mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-24 08:39:51 +00:00
Move some more color conversion functions to ColorConv
This commit is contained in:
parent
1285ae4a61
commit
c2cf2bd97e
@ -19,3 +19,203 @@
|
||||
#include "ColorConv.h"
|
||||
#include "CommonTypes.h"
|
||||
|
||||
// convert 4444 image to 8888, parallelizable
|
||||
void convert4444_gl(u16* data, u32* out, int width, int l, int u) {
|
||||
for (int y = l; y < u; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = ((val >> 12) & 0xF) * 17;
|
||||
u32 g = ((val >> 8) & 0xF) * 17;
|
||||
u32 b = ((val >> 4) & 0xF) * 17;
|
||||
u32 a = ((val >> 0) & 0xF) * 17;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 565 image to 8888, parallelizable
|
||||
void convert565_gl(u16* data, u32* out, int width, int l, int u) {
|
||||
for (int y = l; y < u; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val >> 11) & 0x1F);
|
||||
u32 g = Convert6To8((val >> 5) & 0x3F);
|
||||
u32 b = Convert5To8((val)& 0x1F);
|
||||
out[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 5551 image to 8888, parallelizable
|
||||
void convert5551_gl(u16* data, u32* out, int width, int l, int u) {
|
||||
for (int y = l; y < u; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val >> 11) & 0x1F);
|
||||
u32 g = Convert5To8((val >> 6) & 0x1F);
|
||||
u32 b = Convert5To8((val >> 1) & 0x1F);
|
||||
u32 a = (val & 0x1) * 255;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 4444 image to 8888, parallelizable
|
||||
void convert4444_dx9(u16* data, u32* out, int width, int l, int u) {
|
||||
for (int y = l; y < u; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = ((val >> 0) & 0xF) * 17;
|
||||
u32 g = ((val >> 4) & 0xF) * 17;
|
||||
u32 b = ((val >> 8) & 0xF) * 17;
|
||||
u32 a = ((val >> 12) & 0xF) * 17;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 565 image to 8888, parallelizable
|
||||
void convert565_dx9(u16* data, u32* out, int width, int l, int u) {
|
||||
for (int y = l; y < u; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val)& 0x1F);
|
||||
u32 g = Convert6To8((val >> 5) & 0x3F);
|
||||
u32 b = Convert5To8((val >> 11) & 0x1F);
|
||||
out[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 5551 image to 8888, parallelizable
|
||||
void convert5551_dx9(u16* data, u32* out, int width, int l, int u) {
|
||||
for (int y = l; y < u; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val >> 0) & 0x1F);
|
||||
u32 g = Convert5To8((val >> 5) & 0x1F);
|
||||
u32 b = Convert5To8((val >> 10) & 0x1F);
|
||||
u32 a = ((val >> 15) & 0x1) * 255;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void ConvertBGRA8888ToRGBA8888(u32 *dst, const u32 *src, const u32 numPixels) {
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskGA = _mm_set1_epi32(0xFF00FF00);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = numPixels / 4;
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF)) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; ++i) {
|
||||
__m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i rb = _mm_andnot_si128(maskGA, c);
|
||||
c = _mm_and_si128(c, maskGA);
|
||||
|
||||
__m128i b = _mm_srli_epi32(rb, 16);
|
||||
__m128i r = _mm_slli_epi32(rb, 16);
|
||||
c = _mm_or_si128(_mm_or_si128(c, r), b);
|
||||
_mm_store_si128(&dstp[i], c);
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
u32 i = sseChunks * 4;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
const u32 c = src[i];
|
||||
dst[i] = ((c >> 16) & 0x000000FF) |
|
||||
((c >> 0) & 0xFF00FF00) |
|
||||
((c << 16) & 0x00FF0000);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels) {
|
||||
#if _M_SSE >= 0x401
|
||||
const __m128i maskAG = _mm_set1_epi32(0x8000F800);
|
||||
const __m128i maskRB = _mm_set1_epi32(0x00F800F8);
|
||||
const __m128i mask = _mm_set1_epi32(0x0000FFFF);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = (numPixels / 4) & ~1;
|
||||
// SSE 4.1 required for _mm_packus_epi32.
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF) || !cpu_info.bSSE4_1) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; i += 2) {
|
||||
__m128i c1 = _mm_load_si128(&srcp[i + 0]);
|
||||
__m128i c2 = _mm_load_si128(&srcp[i + 1]);
|
||||
__m128i ag, rb;
|
||||
|
||||
ag = _mm_and_si128(c1, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c1, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 3), _mm_srli_epi32(rb, 9));
|
||||
c1 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
ag = _mm_and_si128(c2, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c2, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 3), _mm_srli_epi32(rb, 9));
|
||||
c2 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
_mm_store_si128(&dstp[i / 2], _mm_packus_epi32(c1, c2));
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
u32 i = sseChunks * 4;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
dst[i] = RGBA8888toRGBA5551(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels) {
|
||||
#if _M_SSE >= 0x401
|
||||
const __m128i maskAG = _mm_set1_epi32(0x8000F800);
|
||||
const __m128i maskRB = _mm_set1_epi32(0x00F800F8);
|
||||
const __m128i mask = _mm_set1_epi32(0x0000FFFF);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = (numPixels / 4) & ~1;
|
||||
// SSE 4.1 required for _mm_packus_epi32.
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF) || !cpu_info.bSSE4_1) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; i += 2) {
|
||||
__m128i c1 = _mm_load_si128(&srcp[i + 0]);
|
||||
__m128i c2 = _mm_load_si128(&srcp[i + 1]);
|
||||
__m128i ag, rb;
|
||||
|
||||
ag = _mm_and_si128(c1, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c1, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 19), _mm_slli_epi32(rb, 7));
|
||||
c1 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
ag = _mm_and_si128(c2, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c2, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 19), _mm_slli_epi32(rb, 7));
|
||||
c2 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
_mm_store_si128(&dstp[i / 2], _mm_packus_epi32(c1, c2));
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
u32 i = sseChunks * 4;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
dst[i] = BGRA8888toRGBA5551(src[i]);
|
||||
}
|
||||
}
|
||||
|
@ -54,6 +54,10 @@ inline u16 BGRA8888toRGBA4444(u32 px) {
|
||||
return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000);
|
||||
}
|
||||
|
||||
inline u16 BGRA8888toRGBA5551(u32 px) {
|
||||
return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000);
|
||||
}
|
||||
|
||||
inline u16 RGBA8888toRGBA5551(u32 px) {
|
||||
return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000);
|
||||
}
|
||||
@ -156,3 +160,15 @@ inline u16 RGBA8888To4444(u32 value)
|
||||
const u16 a = (c >> 12) & 0xF000;
|
||||
return r | g | b | a;
|
||||
}
|
||||
|
||||
// convert image to 8888, parallelizable
|
||||
void convert4444_gl(u16* data, u32* out, int width, int l, int u);
|
||||
void convert565_gl(u16* data, u32* out, int width, int l, int u);
|
||||
void convert5551_gl(u16* data, u32* out, int width, int l, int u);
|
||||
void convert4444_dx9(u16* data, u32* out, int width, int l, int u);
|
||||
void convert565_dx9(u16* data, u32* out, int width, int l, int u);
|
||||
void convert5551_dx9(u16* data, u32* out, int width, int l, int u);
|
||||
|
||||
void ConvertBGRA8888ToRGBA8888(u32 *dst, const u32 *src, const u32 numPixels);
|
||||
void ConvertRGBA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels);
|
||||
void ConvertBGRA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels);
|
||||
|
@ -328,124 +328,3 @@ void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch) {
|
||||
dst += pitch;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGBA8888(u32 *dst, const u32 *src, const u32 numPixels) {
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskGA = _mm_set1_epi32(0xFF00FF00);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = numPixels / 4;
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF)) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; ++i) {
|
||||
__m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i rb = _mm_andnot_si128(maskGA, c);
|
||||
c = _mm_and_si128(c, maskGA);
|
||||
|
||||
__m128i b = _mm_srli_epi32(rb, 16);
|
||||
__m128i r = _mm_slli_epi32(rb, 16);
|
||||
c = _mm_or_si128(_mm_or_si128(c, r), b);
|
||||
_mm_store_si128(&dstp[i], c);
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
u32 i = sseChunks * 4;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
const u32 c = src[i];
|
||||
dst[i] = ((c >> 16) & 0x000000FF) |
|
||||
((c >> 0) & 0xFF00FF00) |
|
||||
((c << 16) & 0x00FF0000);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels) {
|
||||
#if _M_SSE >= 0x401
|
||||
const __m128i maskAG = _mm_set1_epi32(0x8000F800);
|
||||
const __m128i maskRB = _mm_set1_epi32(0x00F800F8);
|
||||
const __m128i mask = _mm_set1_epi32(0x0000FFFF);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = (numPixels / 4) & ~1;
|
||||
// SSE 4.1 required for _mm_packus_epi32.
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF) || !cpu_info.bSSE4_1) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; i += 2) {
|
||||
__m128i c1 = _mm_load_si128(&srcp[i + 0]);
|
||||
__m128i c2 = _mm_load_si128(&srcp[i + 1]);
|
||||
__m128i ag, rb;
|
||||
|
||||
ag = _mm_and_si128(c1, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c1, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 3), _mm_srli_epi32(rb, 9));
|
||||
c1 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
ag = _mm_and_si128(c2, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c2, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 3), _mm_srli_epi32(rb, 9));
|
||||
c2 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
_mm_store_si128(&dstp[i / 2], _mm_packus_epi32(c1, c2));
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
u32 i = sseChunks * 4;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
dst[i] = RGBA8888toRGBA5551(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
inline u16 BGRA8888toRGBA5551(u32 px) {
|
||||
return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000);
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels) {
|
||||
#if _M_SSE >= 0x401
|
||||
const __m128i maskAG = _mm_set1_epi32(0x8000F800);
|
||||
const __m128i maskRB = _mm_set1_epi32(0x00F800F8);
|
||||
const __m128i mask = _mm_set1_epi32(0x0000FFFF);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = (numPixels / 4) & ~1;
|
||||
// SSE 4.1 required for _mm_packus_epi32.
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF) || !cpu_info.bSSE4_1) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; i += 2) {
|
||||
__m128i c1 = _mm_load_si128(&srcp[i + 0]);
|
||||
__m128i c2 = _mm_load_si128(&srcp[i + 1]);
|
||||
__m128i ag, rb;
|
||||
|
||||
ag = _mm_and_si128(c1, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c1, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 19), _mm_slli_epi32(rb, 7));
|
||||
c1 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
ag = _mm_and_si128(c2, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c2, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 19), _mm_slli_epi32(rb, 7));
|
||||
c2 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
_mm_store_si128(&dstp[i / 2], _mm_packus_epi32(c1, c2));
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
u32 i = sseChunks * 4;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
dst[i] = BGRA8888toRGBA5551(src[i]);
|
||||
}
|
||||
}
|
||||
|
@ -216,7 +216,3 @@ inline void DeIndexTexture4Optimal(ClutT *dest, const u32 texaddr, int length, C
|
||||
const u8 *indexed = (const u8 *) Memory::GetPointer(texaddr);
|
||||
DeIndexTexture4Optimal(dest, indexed, length, color);
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGBA8888(u32 *dst, const u32 *src, const u32 numPixels);
|
||||
void ConvertRGBA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels);
|
||||
void ConvertBGRA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels);
|
||||
|
@ -55,49 +55,6 @@
|
||||
/////////////////////////////////////// Helper Functions (mostly math for parallelization)
|
||||
|
||||
namespace {
|
||||
//////////////////////////////////////////////////////////////////// Color space conversion
|
||||
|
||||
// convert 4444 image to 8888, parallelizable
|
||||
void convert4444(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = ((val>> 0) & 0xF) * 17;
|
||||
u32 g = ((val>> 4) & 0xF) * 17;
|
||||
u32 b = ((val>> 8) & 0xF) * 17;
|
||||
u32 a = ((val>>12) & 0xF) * 17;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 565 image to 8888, parallelizable
|
||||
void convert565(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val ) & 0x1F);
|
||||
u32 g = Convert6To8((val>> 5) & 0x3F);
|
||||
u32 b = Convert5To8((val>>11) & 0x1F);
|
||||
out[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 5551 image to 8888, parallelizable
|
||||
void convert5551(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val>> 0) & 0x1F);
|
||||
u32 g = Convert5To8((val>> 5) & 0x1F);
|
||||
u32 b = Convert5To8((val>>10) & 0x1F);
|
||||
u32 a = ((val >> 15) & 0x1) * 255;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////// Various image processing
|
||||
|
||||
#define R(_col) ((_col>> 0)&0xFF)
|
||||
@ -678,15 +635,15 @@ void TextureScalerDX9::ConvertTo8888(u32 format, u32* source, u32* &dest, int wi
|
||||
break;
|
||||
|
||||
case D3DFMT_A4R4G4B4:
|
||||
GlobalThreadPool::Loop(std::bind(&convert4444, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&convert4444_dx9, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
case D3DFMT_R5G6B5:
|
||||
GlobalThreadPool::Loop(std::bind(&convert565, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&convert565_dx9, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
case D3DFMT_A1R5G5B5:
|
||||
GlobalThreadPool::Loop(std::bind(&convert5551, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&convert5551_dx9, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -49,49 +49,6 @@
|
||||
/////////////////////////////////////// Helper Functions (mostly math for parallelization)
|
||||
|
||||
namespace {
|
||||
//////////////////////////////////////////////////////////////////// Color space conversion
|
||||
|
||||
// convert 4444 image to 8888, parallelizable
|
||||
void convert4444(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = ((val>>12) & 0xF) * 17;
|
||||
u32 g = ((val>> 8) & 0xF) * 17;
|
||||
u32 b = ((val>> 4) & 0xF) * 17;
|
||||
u32 a = ((val>> 0) & 0xF) * 17;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 565 image to 8888, parallelizable
|
||||
void convert565(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val>>11) & 0x1F);
|
||||
u32 g = Convert6To8((val>> 5) & 0x3F);
|
||||
u32 b = Convert5To8((val ) & 0x1F);
|
||||
out[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 5551 image to 8888, parallelizable
|
||||
void convert5551(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val>>11) & 0x1F);
|
||||
u32 g = Convert5To8((val>> 6) & 0x1F);
|
||||
u32 b = Convert5To8((val>> 1) & 0x1F);
|
||||
u32 a = (val & 0x1) * 255;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////// Various image processing
|
||||
|
||||
#define R(_col) ((_col>> 0)&0xFF)
|
||||
@ -670,15 +627,15 @@ void TextureScaler::ConvertTo8888(GLenum format, u32* source, u32* &dest, int wi
|
||||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_4_4_4_4:
|
||||
GlobalThreadPool::Loop(std::bind(&convert4444, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&convert4444_gl, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_5_6_5:
|
||||
GlobalThreadPool::Loop(std::bind(&convert565, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&convert565_gl, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_5_5_5_1:
|
||||
GlobalThreadPool::Loop(std::bind(&convert5551, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&convert5551_gl, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -17,6 +17,8 @@
|
||||
|
||||
#include "headless/Compare.h"
|
||||
#include "file/file_util.h"
|
||||
|
||||
#include "Common/ColorConv.h"
|
||||
#include "Core/Host.h"
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
|
Loading…
Reference in New Issue
Block a user