mirror of
https://github.com/libretro/ppsspp.git
synced 2025-01-30 21:33:56 +00:00
Move color conversion funcs to ColorConv.
This paves the way a bit for NEON conversion funcs.
This commit is contained in:
parent
3f29329ed2
commit
1767bd958c
@ -336,4 +336,135 @@ void ConvertBGR565ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels) {
|
||||
u16 col0 = src[x];
|
||||
ARGB8From565(col0, &dst[x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA4444ToABGR4444(u16 *dst, const u16 *src, const u32 numPixels) {
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskB = _mm_set1_epi16(0x00F0);
|
||||
const __m128i maskG = _mm_set1_epi16(0x0F00);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = numPixels / 8;
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF)) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; ++i) {
|
||||
const __m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i v = _mm_srli_epi16(c, 12);
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 4), maskB));
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_slli_epi16(c, 4), maskG));
|
||||
v = _mm_or_si128(v, _mm_slli_epi16(c, 12));
|
||||
_mm_store_si128(&dstp[i], v);
|
||||
}
|
||||
// The remainder is done in chunks of 2, SSE was chunks of 8.
|
||||
u32 i = sseChunks * 8 / 2;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
|
||||
const u32 *src32 = (const u32 *)src;
|
||||
u32 *dst32 = (u32 *)dst;
|
||||
for (; i < numPixels / 2; i++) {
|
||||
const u32 c = src32[i];
|
||||
dst32[i] = ((c >> 12) & 0x000F000F) |
|
||||
((c >> 4) & 0x00F000F0) |
|
||||
((c << 4) & 0x0F000F00) |
|
||||
((c << 12) & 0xF000F000);
|
||||
}
|
||||
|
||||
if (numPixels & 1) {
|
||||
const u32 i = numPixels - 1;
|
||||
const u16 c = src[i];
|
||||
dst[i] = ((c >> 12) & 0x000F) |
|
||||
((c >> 4) & 0x00F0) |
|
||||
((c << 4) & 0x0F00) |
|
||||
((c << 12) & 0xF000);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA5551ToABGR1555(u16 *dst, const u16 *src, const u32 numPixels) {
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskB = _mm_set1_epi16(0x003E);
|
||||
const __m128i maskG = _mm_set1_epi16(0x07C0);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = numPixels / 8;
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF)) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; ++i) {
|
||||
const __m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i v = _mm_srli_epi16(c, 15);
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 9), maskB));
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_slli_epi16(c, 1), maskG));
|
||||
v = _mm_or_si128(v, _mm_slli_epi16(c, 11));
|
||||
_mm_store_si128(&dstp[i], v);
|
||||
}
|
||||
// The remainder is done in chunks of 2, SSE was chunks of 8.
|
||||
u32 i = sseChunks * 8 / 2;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
|
||||
const u32 *src32 = (const u32 *)src;
|
||||
u32 *dst32 = (u32 *)dst;
|
||||
for (; i < numPixels / 2; i++) {
|
||||
const u32 c = src32[i];
|
||||
dst32[i] = ((c >> 15) & 0x00010001) |
|
||||
((c >> 9) & 0x003E003E) |
|
||||
((c << 1) & 0x07C007C0) |
|
||||
((c << 11) & 0xF800F800);
|
||||
}
|
||||
|
||||
if (numPixels & 1) {
|
||||
const u32 i = numPixels - 1;
|
||||
const u16 c = src[i];
|
||||
dst[i] = ((c >> 15) & 0x0001) |
|
||||
((c >> 9) & 0x003E) |
|
||||
((c << 1) & 0x07C0) |
|
||||
((c << 11) & 0xF800);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGB565ToBGR565(u16 *dst, const u16 *src, const u32 numPixels) {
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskG = _mm_set1_epi16(0x07E0);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = numPixels / 8;
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF)) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; ++i) {
|
||||
const __m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i v = _mm_srli_epi16(c, 11);
|
||||
v = _mm_or_si128(v, _mm_and_si128(c, maskG));
|
||||
v = _mm_or_si128(v, _mm_slli_epi16(c, 11));
|
||||
_mm_store_si128(&dstp[i], v);
|
||||
}
|
||||
// The remainder is done in chunks of 2, SSE was chunks of 8.
|
||||
u32 i = sseChunks * 8 / 2;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
|
||||
const u32 *src32 = (const u32 *)src;
|
||||
u32 *dst32 = (u32 *)dst;
|
||||
for (; i < numPixels / 2; i++) {
|
||||
const u32 c = src32[i];
|
||||
dst32[i] = ((c >> 11) & 0x001F001F) |
|
||||
((c >> 0) & 0x07E007E0) |
|
||||
((c << 11) & 0xF800F800);
|
||||
}
|
||||
|
||||
if (numPixels & 1) {
|
||||
const u32 i = numPixels - 1;
|
||||
const u16 c = src[i];
|
||||
dst[i] = ((c >> 11) & 0x001F) |
|
||||
((c >> 0) & 0x07E0) |
|
||||
((c << 11) & 0xF800);
|
||||
}
|
||||
}
|
||||
|
@ -105,6 +105,7 @@ void convert5551_dx9(u16* data, u32* out, int width, int l, int u);
|
||||
// "Complete" set of color conversion functions between the usual formats.
|
||||
|
||||
void ConvertBGRA8888ToRGBA8888(u32 *dst, const u32 *src, const u32 numPixels);
|
||||
#define ConvertRGBA8888ToBGRA8888 ConvertBGRA8888ToRGBA8888
|
||||
|
||||
void ConvertRGBA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels);
|
||||
void ConvertRGBA8888ToRGB565(u16 *dst, const u32 *src, const u32 numPixels);
|
||||
@ -121,3 +122,7 @@ void ConvertRGBA4444ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertBGRA4444ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertBGRA5551ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertBGR565ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels);
|
||||
|
||||
void ConvertRGBA4444ToABGR4444(u16 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertRGBA5551ToABGR1555(u16 *dst, const u16 *src, const u32 numPixels);
|
||||
void ConvertRGB565ToBGR565(u16 *dst, const u16 *src, const u32 numPixels);
|
||||
|
@ -787,128 +787,18 @@ static void ConvertColors(void *dstBuf, const void *srcBuf, GLuint dstFmt, int n
|
||||
u32 *dst = (u32 *)dstBuf;
|
||||
switch (dstFmt) {
|
||||
case GL_UNSIGNED_SHORT_4_4_4_4:
|
||||
{
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskB = _mm_set1_epi16(0x00F0);
|
||||
const __m128i maskG = _mm_set1_epi16(0x0F00);
|
||||
|
||||
__m128i *srcp = (__m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
const int sseChunks = numPixels / 8;
|
||||
for (int i = 0; i < sseChunks; ++i) {
|
||||
__m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i v = _mm_srli_epi16(c, 12);
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 4), maskB));
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_slli_epi16(c, 4), maskG));
|
||||
v = _mm_or_si128(v, _mm_slli_epi16(c, 12));
|
||||
_mm_store_si128(&dstp[i], v);
|
||||
}
|
||||
// The remainder is done in chunks of 2, SSE was chunks of 8.
|
||||
int i = sseChunks * 8 / 2;
|
||||
#else
|
||||
int i = 0;
|
||||
// TODO: NEON.
|
||||
#endif
|
||||
for (; i < (numPixels + 1) / 2; i++) {
|
||||
u32 c = src[i];
|
||||
dst[i] = ((c >> 12) & 0x000F000F) |
|
||||
((c >> 4) & 0x00F000F0) |
|
||||
((c << 4) & 0x0F000F00) |
|
||||
((c << 12) & 0xF000F000);
|
||||
}
|
||||
}
|
||||
ConvertRGBA4444ToABGR4444((u16 *)dst, (const u16 *)src, numPixels);
|
||||
break;
|
||||
// Final Fantasy 2 uses this heavily in animated textures.
|
||||
case GL_UNSIGNED_SHORT_5_5_5_1:
|
||||
{
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskB = _mm_set1_epi16(0x003E);
|
||||
const __m128i maskG = _mm_set1_epi16(0x07C0);
|
||||
|
||||
__m128i *srcp = (__m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
const int sseChunks = numPixels / 8;
|
||||
for (int i = 0; i < sseChunks; ++i) {
|
||||
__m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i v = _mm_srli_epi16(c, 15);
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 9), maskB));
|
||||
v = _mm_or_si128(v, _mm_and_si128(_mm_slli_epi16(c, 1), maskG));
|
||||
v = _mm_or_si128(v, _mm_slli_epi16(c, 11));
|
||||
_mm_store_si128(&dstp[i], v);
|
||||
}
|
||||
// The remainder is done in chunks of 2, SSE was chunks of 8.
|
||||
int i = sseChunks * 8 / 2;
|
||||
#else
|
||||
int i = 0;
|
||||
// TODO: NEON.
|
||||
#endif
|
||||
for (; i < (numPixels + 1) / 2; i++) {
|
||||
u32 c = src[i];
|
||||
dst[i] = ((c >> 15) & 0x00010001) |
|
||||
((c >> 9) & 0x003E003E) |
|
||||
((c << 1) & 0x07C007C0) |
|
||||
((c << 11) & 0xF800F800);
|
||||
}
|
||||
}
|
||||
ConvertRGBA5551ToABGR1555((u16 *)dst, (const u16 *)src, numPixels);
|
||||
break;
|
||||
case GL_UNSIGNED_SHORT_5_6_5:
|
||||
{
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskG = _mm_set1_epi16(0x07E0);
|
||||
|
||||
__m128i *srcp = (__m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
const int sseChunks = numPixels / 8;
|
||||
for (int i = 0; i < sseChunks; ++i) {
|
||||
__m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i v = _mm_srli_epi16(c, 11);
|
||||
v = _mm_or_si128(v, _mm_and_si128(c, maskG));
|
||||
v = _mm_or_si128(v, _mm_slli_epi16(c, 11));
|
||||
_mm_store_si128(&dstp[i], v);
|
||||
}
|
||||
// The remainder is done in chunks of 2, SSE was chunks of 8.
|
||||
int i = sseChunks * 8 / 2;
|
||||
#else
|
||||
int i = 0;
|
||||
// TODO: NEON.
|
||||
#endif
|
||||
for (; i < (numPixels + 1) / 2; i++) {
|
||||
u32 c = src[i];
|
||||
dst[i] = ((c >> 11) & 0x001F001F) |
|
||||
((c >> 0) & 0x07E007E0) |
|
||||
((c << 11) & 0xF800F800);
|
||||
}
|
||||
}
|
||||
ConvertRGB565ToBGR565((u16 *)dst, (const u16 *)src, numPixels);
|
||||
break;
|
||||
default:
|
||||
if (UseBGRA8888()) {
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskGA = _mm_set1_epi32(0xFF00FF00);
|
||||
|
||||
__m128i *srcp = (__m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
const int sseChunks = numPixels / 4;
|
||||
for (int i = 0; i < sseChunks; ++i) {
|
||||
__m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i rb = _mm_andnot_si128(maskGA, c);
|
||||
c = _mm_and_si128(c, maskGA);
|
||||
|
||||
__m128i b = _mm_srli_epi32(rb, 16);
|
||||
__m128i r = _mm_slli_epi32(rb, 16);
|
||||
c = _mm_or_si128(_mm_or_si128(c, r), b);
|
||||
_mm_store_si128(&dstp[i], c);
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
int i = sseChunks * 4;
|
||||
#else
|
||||
int i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
u32 c = src[i];
|
||||
dst[i] = ((c >> 16) & 0x000000FF) |
|
||||
((c >> 0) & 0xFF00FF00) |
|
||||
((c << 16) & 0x00FF0000);
|
||||
}
|
||||
ConvertRGBA8888ToBGRA8888(dst, src, numPixels);
|
||||
} else {
|
||||
// No need to convert RGBA8888, right order already
|
||||
if (dst != src)
|
||||
|
Loading…
x
Reference in New Issue
Block a user