Add a NEON 4444 flip color conversion func.

This commit is contained in:
Unknown W. Brackets 2015-05-23 10:46:08 -07:00
parent df53bc2e7b
commit 4900bc9082
4 changed files with 46 additions and 7 deletions

View File

@ -469,10 +469,9 @@ void ConvertBGR565ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels) {
}
}
void ConvertRGBA4444ToABGR4444(u16 *dst, const u16 *src, const u32 numPixels) {
void ConvertRGBA4444ToABGR4444Basic(u16 *dst, const u16 *src, const u32 numPixels) {
#ifdef _M_SSE
const __m128i maskB = _mm_set1_epi16(0x00F0);
const __m128i maskG = _mm_set1_epi16(0x0F00);
const __m128i mask0040 = _mm_set1_epi16(0x00F0);
const __m128i *srcp = (const __m128i *)src;
__m128i *dstp = (__m128i *)dst;
@ -483,8 +482,8 @@ void ConvertRGBA4444ToABGR4444(u16 *dst, const u16 *src, const u32 numPixels) {
for (u32 i = 0; i < sseChunks; ++i) {
const __m128i c = _mm_load_si128(&srcp[i]);
__m128i v = _mm_srli_epi16(c, 12);
v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 4), maskB));
v = _mm_or_si128(v, _mm_and_si128(_mm_slli_epi16(c, 4), maskG));
v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 4), mask0040));
v = _mm_or_si128(v, _mm_slli_epi16(_mm_and_si128(c, mask0040), 4));
v = _mm_or_si128(v, _mm_slli_epi16(c, 12));
_mm_store_si128(&dstp[i], v);
}
@ -600,7 +599,9 @@ void ConvertRGB565ToBGR565Basic(u16 *dst, const u16 *src, const u32 numPixels) {
}
}
#ifndef ConvertRGBA5551ToABGR1555
// Reuse the logic from the header - if these aren't defined, we need externs.
#ifndef ConvertRGBA4444ToABGR4444
Convert16bppTo16bppFunc ConvertRGBA4444ToABGR4444 = &ConvertRGBA4444ToABGR4444Basic;
Convert16bppTo16bppFunc ConvertRGBA5551ToABGR1555 = &ConvertRGBA5551ToABGR1555Basic;
Convert16bppTo16bppFunc ConvertRGB565ToBGR565 = &ConvertRGB565ToBGR565Basic;
#endif
@ -608,6 +609,7 @@ Convert16bppTo16bppFunc ConvertRGB565ToBGR565 = &ConvertRGB565ToBGR565Basic;
void SetupColorConv() {
#if defined(HAVE_ARMV7) && !defined(ARM64)
if (cpu_info.bNEON) {
ConvertRGBA4444ToABGR4444 = &ConvertRGBA4444ToABGR4444NEON;
ConvertRGBA5551ToABGR1555 = &ConvertRGBA5551ToABGR1555NEON;
ConvertRGB565ToBGR565 = &ConvertRGB565ToBGR565NEON;
}

View File

@ -130,10 +130,18 @@ void ConvertBGRA4444ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels);
void ConvertBGRA5551ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels);
void ConvertBGR565ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels);
void ConvertRGBA4444ToABGR4444(u16 *dst, const u16 *src, const u32 numPixels);
void ConvertRGBA4444ToABGR4444Basic(u16 *dst, const u16 *src, const u32 numPixels);
void ConvertRGBA5551ToABGR1555Basic(u16 *dst, const u16 *src, const u32 numPixels);
void ConvertRGB565ToBGR565Basic(u16 *dst, const u16 *src, const u32 numPixels);
#if defined(ARM64)
#define ConvertRGBA4444ToABGR4444 ConvertRGBA4444ToABGR4444NEON
#elif !defined(ARM)
#define ConvertRGBA4444ToABGR4444 ConvertRGBA4444ToABGR4444Basic
#else
extern Convert16bppTo16bppFunc ConvertRGBA4444ToABGR4444;
#endif
#if defined(ARM64)
#define ConvertRGBA5551ToABGR1555 ConvertRGBA5551ToABGR1555NEON
#elif !defined(ARM)

View File

@ -26,6 +26,34 @@
// TODO: More NEON color conversion funcs.
void ConvertRGBA4444ToABGR4444NEON(u16 *dst, const u16 *src, const u32 numPixels) {
const uint16x8_t mask0040 = vdupq_n_u16(0x00F0);
u32 simdable = (numPixels / 8) * 8;
const u16 *srcp = src;
u16 *dstp = dst;
for (u32 i = 0; i < simdable; i += 8) {
uint16x8_t c = vld1q_u16(srcp);
const uint16x8_t a = vshrq_n_u16(c, 12);
const uint16x8_t b = vandq_u16(vshrq_n_u16(c, 4), mask0040);
const uint16x8_t g = vshlq_n_u16(vandq_u16(c, mask0040), 4);
const uint16x8_t r = vshlq_n_u16(c, 12);
uint16x8_t res = vorrq_u16(vorrq_u16(r, g), vorrq_u16(b, a));
vst1q_u16(dstp, res);
srcp += 8;
dstp += 8;
}
// Finish off the rest, if there were any outside the simdable range.
if (numPixels > simdable) {
// Note that we've already moved srcp/dstp forward.
ConvertRGBA4444ToABGR4444Basic(dst, src, numPixels - simdable);
}
}
void ConvertRGBA5551ToABGR1555NEON(u16 *dst, const u16 *src, const u32 numPixels) {
const uint16x8_t maskB = vdupq_n_u16(0x003E);
const uint16x8_t maskG = vdupq_n_u16(0x07C0);

View File

@ -19,5 +19,6 @@
#include "ColorConv.h"
void ConvertRGBA4444ToABGR4444NEON(u16 *dst, const u16 *src, const u32 numPixels);
void ConvertRGBA5551ToABGR1555NEON(u16 *dst, const u16 *src, const u32 numPixels);
void ConvertRGB565ToBGR565NEON(u16 *dst, const u16 *src, const u32 numPixels);