diff --git a/CMakeLists.txt b/CMakeLists.txt index b018c10f2..421b1596d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -290,7 +290,8 @@ set(CommonExtra) if(ARM) set(CommonExtra ${CommonExtra} Common/ArmCPUDetect.cpp - Common/ArmThunk.cpp) + Common/ArmThunk.cpp + Common/ColorConvNEON.cpp) elseif(X86) set(CommonExtra ${CommonExtra} Common/ABI.cpp diff --git a/Common/ColorConv.cpp b/Common/ColorConv.cpp index 68f9febf1..4e5fb210c 100644 --- a/Common/ColorConv.cpp +++ b/Common/ColorConv.cpp @@ -15,9 +15,15 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. - #include "ColorConv.h" -#include "CommonTypes.h" +// NEON is in a separate file so that it can be compiled with a runtime check. +#include "ColorConvNEON.h" +#include "Common.h" +#include "CPUDetect.h" + +#ifdef _M_SSE +#include +#endif inline u16 RGBA8888toRGB565(u32 px) { return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); @@ -269,8 +275,51 @@ void ConvertRGBA8888ToRGBA4444(u16 *dst, const u32 *src, const u32 numPixels) { } void ConvertRGBA565ToRGBA8888(u32 *dst32, const u16 *src, const u32 numPixels) { +#ifdef _M_SSE + const __m128i mask5 = _mm_set1_epi16(0x001f); + const __m128i mask6 = _mm_set1_epi16(0x003f); + const __m128i mask8 = _mm_set1_epi16(0x00ff); + + const __m128i *srcp = (const __m128i *)src; + __m128i *dstp = (__m128i *)dst32; + u32 sseChunks = numPixels / 8; + if (((intptr_t)src & 0xF) || ((intptr_t)dst32 & 0xF)) { + sseChunks = 0; + } + for (u32 i = 0; i < sseChunks; ++i) { + const __m128i c = _mm_load_si128(&srcp[i]); + + // Swizzle, resulting in RR00 RR00. + __m128i r = _mm_and_si128(c, mask5); + r = _mm_or_si128(_mm_slli_epi16(r, 3), _mm_srli_epi16(r, 2)); + r = _mm_and_si128(r, mask8); + + // This one becomes 00GG 00GG. + __m128i g = _mm_and_si128(_mm_srli_epi16(c, 5), mask6); + g = _mm_or_si128(_mm_slli_epi16(g, 2), _mm_srli_epi16(g, 4)); + g = _mm_slli_epi16(g, 8); + + // Almost done, we aim for BB00 BB00 again here. + __m128i b = _mm_and_si128(_mm_srli_epi16(c, 11), mask5); + b = _mm_or_si128(_mm_slli_epi16(b, 3), _mm_srli_epi16(b, 2)); + b = _mm_and_si128(b, mask8); + + // Always set to 00FF 00FF. + __m128i a = _mm_slli_epi16(mask8, 8); + + // Now combine them, RRGG RRGG and BBAA BBAA, and then interleave. + const __m128i rg = _mm_or_si128(r, g); + const __m128i ba = _mm_or_si128(b, a); + _mm_store_si128(&dstp[i * 2 + 0], _mm_unpacklo_epi16(rg, ba)); + _mm_store_si128(&dstp[i * 2 + 1], _mm_unpackhi_epi16(rg, ba)); + } + u32 i = sseChunks * 8; +#else + u32 i = 0; +#endif + u8 *dst = (u8 *)dst32; - for (u32 x = 0; x < numPixels; x++) { + for (u32 x = i; x < numPixels; x++) { u16 col = src[x]; dst[x * 4] = Convert5To8((col) & 0x1f); dst[x * 4 + 1] = Convert6To8((col >> 5) & 0x3f); @@ -280,8 +329,52 @@ void ConvertRGBA565ToRGBA8888(u32 *dst32, const u16 *src, const u32 numPixels) { } void ConvertRGBA5551ToRGBA8888(u32 *dst32, const u16 *src, const u32 numPixels) { +#ifdef _M_SSE + const __m128i mask5 = _mm_set1_epi16(0x001f); + const __m128i mask8 = _mm_set1_epi16(0x00ff); + const __m128i one = _mm_set1_epi16(0x0001); + + const __m128i *srcp = (const __m128i *)src; + __m128i *dstp = (__m128i *)dst32; + u32 sseChunks = numPixels / 8; + if (((intptr_t)src & 0xF) || ((intptr_t)dst32 & 0xF)) { + sseChunks = 0; + } + for (u32 i = 0; i < sseChunks; ++i) { + const __m128i c = _mm_load_si128(&srcp[i]); + + // Swizzle, resulting in RR00 RR00. + __m128i r = _mm_and_si128(c, mask5); + r = _mm_or_si128(_mm_slli_epi16(r, 3), _mm_srli_epi16(r, 2)); + r = _mm_and_si128(r, mask8); + + // This one becomes 00GG 00GG. + __m128i g = _mm_and_si128(_mm_srli_epi16(c, 5), mask5); + g = _mm_or_si128(_mm_slli_epi16(g, 3), _mm_srli_epi16(g, 2)); + g = _mm_slli_epi16(g, 8); + + // Almost done, we aim for BB00 BB00 again here. + __m128i b = _mm_and_si128(_mm_srli_epi16(c, 10), mask5); + b = _mm_or_si128(_mm_slli_epi16(b, 3), _mm_srli_epi16(b, 2)); + b = _mm_and_si128(b, mask8); + + // 1 bit A to 00AA 00AA. + __m128i a = _mm_srli_epi16(c, 15); + a = _mm_slli_epi16(_mm_cmpeq_epi16(a, one), 8); + + // Now combine them, RRGG RRGG and BBAA BBAA, and then interleave. + const __m128i rg = _mm_or_si128(r, g); + const __m128i ba = _mm_or_si128(b, a); + _mm_store_si128(&dstp[i * 2 + 0], _mm_unpacklo_epi16(rg, ba)); + _mm_store_si128(&dstp[i * 2 + 1], _mm_unpackhi_epi16(rg, ba)); + } + u32 i = sseChunks * 8; +#else + u32 i = 0; +#endif + u8 *dst = (u8 *)dst32; - for (u32 x = 0; x < numPixels; x++) { + for (u32 x = i; x < numPixels; x++) { u16 col = src[x]; dst[x * 4] = Convert5To8((col) & 0x1f); dst[x * 4 + 1] = Convert5To8((col >> 5) & 0x1f); @@ -290,9 +383,50 @@ void ConvertRGBA5551ToRGBA8888(u32 *dst32, const u16 *src, const u32 numPixels) } } +// TODO: This seems to be BGRA4444 -> RGBA888? void ConvertRGBA4444ToRGBA8888(u32 *dst32, const u16 *src, const u32 numPixels) { +#ifdef _M_SSE + const __m128i mask4 = _mm_set1_epi16(0x000f); + const __m128i mask8 = _mm_set1_epi16(0x00ff); + const __m128i one = _mm_set1_epi16(0x0001); + + const __m128i *srcp = (const __m128i *)src; + __m128i *dstp = (__m128i *)dst32; + u32 sseChunks = numPixels / 8; + if (((intptr_t)src & 0xF) || ((intptr_t)dst32 & 0xF)) { + sseChunks = 0; + } + for (u32 i = 0; i < sseChunks; ++i) { + const __m128i c = _mm_load_si128(&srcp[i]); + + // Let's just grab R000 R000, without swizzling yet. + __m128i r = _mm_and_si128(_mm_srli_epi16(c, 8), mask4); + // And then 00G0 00G0. + __m128i g = _mm_and_si128(_mm_srli_epi16(c, 4), mask4); + g = _mm_slli_epi16(g, 8); + // Now B000 B000. + __m128i b = _mm_and_si128(c, mask4); + // And lastly 00A0 00A0. No mask needed, we have a wall. + __m128i a = _mm_srli_epi16(c, 12); + a = _mm_slli_epi16(g, 8); + + // We swizzle after combining - R0G0 R0G0 and B0A0 B0A0 -> RRGG RRGG and BBAA BBAA. + __m128i rg = _mm_or_si128(r, g); + __m128i ba = _mm_or_si128(b, a); + rg = _mm_or_si128(rg, _mm_slli_epi16(rg, 4)); + ba = _mm_or_si128(ba, _mm_slli_epi16(ba, 4)); + + // And then we can store. + _mm_store_si128(&dstp[i * 2 + 0], _mm_unpacklo_epi16(rg, ba)); + _mm_store_si128(&dstp[i * 2 + 1], _mm_unpackhi_epi16(rg, ba)); + } + u32 i = sseChunks * 8; +#else + u32 i = 0; +#endif + u8 *dst = (u8 *)dst32; - for (u32 x = 0; x < numPixels; x++) { + for (u32 x = i; x < numPixels; x++) { u16 col = src[x]; dst[x * 4] = Convert4To8((col >> 8) & 0xf); dst[x * 4 + 1] = Convert4To8((col >> 4) & 0xf); @@ -301,6 +435,7 @@ void ConvertRGBA4444ToRGBA8888(u32 *dst32, const u16 *src, const u32 numPixels) } } +// TODO: This seems to be ABGR4444 -> RGBA888? void ConvertBGRA4444ToRGBA8888(u32 *dst32, const u16 *src, const u32 numPixels) { u8 *dst = (u8 *)dst32; for (u32 x = 0; x < numPixels; x++) { @@ -332,4 +467,135 @@ void ConvertBGR565ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels) { u16 col0 = src[x]; ARGB8From565(col0, &dst[x]); } -} \ No newline at end of file +} + +void ConvertRGBA4444ToABGR4444(u16 *dst, const u16 *src, const u32 numPixels) { +#ifdef _M_SSE + const __m128i maskB = _mm_set1_epi16(0x00F0); + const __m128i maskG = _mm_set1_epi16(0x0F00); + + const __m128i *srcp = (const __m128i *)src; + __m128i *dstp = (__m128i *)dst; + u32 sseChunks = numPixels / 8; + if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF)) { + sseChunks = 0; + } + for (u32 i = 0; i < sseChunks; ++i) { + const __m128i c = _mm_load_si128(&srcp[i]); + __m128i v = _mm_srli_epi16(c, 12); + v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 4), maskB)); + v = _mm_or_si128(v, _mm_and_si128(_mm_slli_epi16(c, 4), maskG)); + v = _mm_or_si128(v, _mm_slli_epi16(c, 12)); + _mm_store_si128(&dstp[i], v); + } + // The remainder is done in chunks of 2, SSE was chunks of 8. + u32 i = sseChunks * 8 / 2; +#else + u32 i = 0; +#endif + + const u32 *src32 = (const u32 *)src; + u32 *dst32 = (u32 *)dst; + for (; i < numPixels / 2; i++) { + const u32 c = src32[i]; + dst32[i] = ((c >> 12) & 0x000F000F) | + ((c >> 4) & 0x00F000F0) | + ((c << 4) & 0x0F000F00) | + ((c << 12) & 0xF000F000); + } + + if (numPixels & 1) { + const u32 i = numPixels - 1; + const u16 c = src[i]; + dst[i] = ((c >> 12) & 0x000F) | + ((c >> 4) & 0x00F0) | + ((c << 4) & 0x0F00) | + ((c << 12) & 0xF000); + } +} + +void ConvertRGBA5551ToABGR1555(u16 *dst, const u16 *src, const u32 numPixels) { +#ifdef _M_SSE + const __m128i maskB = _mm_set1_epi16(0x003E); + const __m128i maskG = _mm_set1_epi16(0x07C0); + + const __m128i *srcp = (const __m128i *)src; + __m128i *dstp = (__m128i *)dst; + u32 sseChunks = numPixels / 8; + if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF)) { + sseChunks = 0; + } + for (u32 i = 0; i < sseChunks; ++i) { + const __m128i c = _mm_load_si128(&srcp[i]); + __m128i v = _mm_srli_epi16(c, 15); + v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 9), maskB)); + v = _mm_or_si128(v, _mm_and_si128(_mm_slli_epi16(c, 1), maskG)); + v = _mm_or_si128(v, _mm_slli_epi16(c, 11)); + _mm_store_si128(&dstp[i], v); + } + // The remainder is done in chunks of 2, SSE was chunks of 8. + u32 i = sseChunks * 8 / 2; +#else + u32 i = 0; +#endif + + const u32 *src32 = (const u32 *)src; + u32 *dst32 = (u32 *)dst; + for (; i < numPixels / 2; i++) { + const u32 c = src32[i]; + dst32[i] = ((c >> 15) & 0x00010001) | + ((c >> 9) & 0x003E003E) | + ((c << 1) & 0x07C007C0) | + ((c << 11) & 0xF800F800); + } + + if (numPixels & 1) { + const u32 i = numPixels - 1; + const u16 c = src[i]; + dst[i] = ((c >> 15) & 0x0001) | + ((c >> 9) & 0x003E) | + ((c << 1) & 0x07C0) | + ((c << 11) & 0xF800); + } +} + +void ConvertRGB565ToBGR565(u16 *dst, const u16 *src, const u32 numPixels) { +#ifdef _M_SSE + const __m128i maskG = _mm_set1_epi16(0x07E0); + + const __m128i *srcp = (const __m128i *)src; + __m128i *dstp = (__m128i *)dst; + u32 sseChunks = numPixels / 8; + if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF)) { + sseChunks = 0; + } + for (u32 i = 0; i < sseChunks; ++i) { + const __m128i c = _mm_load_si128(&srcp[i]); + __m128i v = _mm_srli_epi16(c, 11); + v = _mm_or_si128(v, _mm_and_si128(c, maskG)); + v = _mm_or_si128(v, _mm_slli_epi16(c, 11)); + _mm_store_si128(&dstp[i], v); + } + // The remainder is done in chunks of 2, SSE was chunks of 8. + u32 i = sseChunks * 8 / 2; +#else + u32 i = 0; +#endif + + const u32 *src32 = (const u32 *)src; + u32 *dst32 = (u32 *)dst; + for (; i < numPixels / 2; i++) { + const u32 c = src32[i]; + dst32[i] = ((c >> 11) & 0x001F001F) | + ((c >> 0) & 0x07E007E0) | + ((c << 11) & 0xF800F800); + } + + if (numPixels & 1) { + const u32 i = numPixels - 1; + const u16 c = src[i]; + dst[i] = ((c >> 11) & 0x001F) | + ((c >> 0) & 0x07E0) | + ((c << 11) & 0xF800); + } +} diff --git a/Common/ColorConv.h b/Common/ColorConv.h index 220cc2bf1..9b1d6c56e 100644 --- a/Common/ColorConv.h +++ b/Common/ColorConv.h @@ -105,6 +105,7 @@ void convert5551_dx9(u16* data, u32* out, int width, int l, int u); // "Complete" set of color conversion functions between the usual formats. void ConvertBGRA8888ToRGBA8888(u32 *dst, const u32 *src, const u32 numPixels); +#define ConvertRGBA8888ToBGRA8888 ConvertBGRA8888ToRGBA8888 void ConvertRGBA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels); void ConvertRGBA8888ToRGB565(u16 *dst, const u32 *src, const u32 numPixels); @@ -121,3 +122,7 @@ void ConvertRGBA4444ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels); void ConvertBGRA4444ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels); void ConvertBGRA5551ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels); void ConvertBGR565ToRGBA8888(u32 *dst, const u16 *src, const u32 numPixels); + +void ConvertRGBA4444ToABGR4444(u16 *dst, const u16 *src, const u32 numPixels); +void ConvertRGBA5551ToABGR1555(u16 *dst, const u16 *src, const u32 numPixels); +void ConvertRGB565ToBGR565(u16 *dst, const u16 *src, const u32 numPixels); diff --git a/Common/ColorConvNEON.cpp b/Common/ColorConvNEON.cpp new file mode 100644 index 000000000..0dd96641e --- /dev/null +++ b/Common/ColorConvNEON.cpp @@ -0,0 +1,23 @@ +// Copyright (c) 2015- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#include "ColorConvNEON.h" +#include "Common.h" +#include "CPUDetect.h" + +// TODO: NEON color conversion funcs. diff --git a/Common/ColorConvNEON.h b/Common/ColorConvNEON.h new file mode 100644 index 000000000..cb8715702 --- /dev/null +++ b/Common/ColorConvNEON.h @@ -0,0 +1,20 @@ +// Copyright (c) 2015- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "ColorConv.h" diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj index 7d151892d..12214ead7 100644 --- a/Common/Common.vcxproj +++ b/Common/Common.vcxproj @@ -194,6 +194,7 @@ + @@ -242,6 +243,12 @@ true true + + true + true + true + true + diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters index d16458767..36a3aee9c 100644 --- a/Common/Common.vcxproj.filters +++ b/Common/Common.vcxproj.filters @@ -46,6 +46,7 @@ + @@ -81,6 +82,7 @@ + diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 77ef5cf4e..c711149e3 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -787,128 +787,18 @@ static void ConvertColors(void *dstBuf, const void *srcBuf, GLuint dstFmt, int n u32 *dst = (u32 *)dstBuf; switch (dstFmt) { case GL_UNSIGNED_SHORT_4_4_4_4: - { -#ifdef _M_SSE - const __m128i maskB = _mm_set1_epi16(0x00F0); - const __m128i maskG = _mm_set1_epi16(0x0F00); - - __m128i *srcp = (__m128i *)src; - __m128i *dstp = (__m128i *)dst; - const int sseChunks = numPixels / 8; - for (int i = 0; i < sseChunks; ++i) { - __m128i c = _mm_load_si128(&srcp[i]); - __m128i v = _mm_srli_epi16(c, 12); - v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 4), maskB)); - v = _mm_or_si128(v, _mm_and_si128(_mm_slli_epi16(c, 4), maskG)); - v = _mm_or_si128(v, _mm_slli_epi16(c, 12)); - _mm_store_si128(&dstp[i], v); - } - // The remainder is done in chunks of 2, SSE was chunks of 8. - int i = sseChunks * 8 / 2; -#else - int i = 0; - // TODO: NEON. -#endif - for (; i < (numPixels + 1) / 2; i++) { - u32 c = src[i]; - dst[i] = ((c >> 12) & 0x000F000F) | - ((c >> 4) & 0x00F000F0) | - ((c << 4) & 0x0F000F00) | - ((c << 12) & 0xF000F000); - } - } + ConvertRGBA4444ToABGR4444((u16 *)dst, (const u16 *)src, numPixels); break; // Final Fantasy 2 uses this heavily in animated textures. case GL_UNSIGNED_SHORT_5_5_5_1: - { -#ifdef _M_SSE - const __m128i maskB = _mm_set1_epi16(0x003E); - const __m128i maskG = _mm_set1_epi16(0x07C0); - - __m128i *srcp = (__m128i *)src; - __m128i *dstp = (__m128i *)dst; - const int sseChunks = numPixels / 8; - for (int i = 0; i < sseChunks; ++i) { - __m128i c = _mm_load_si128(&srcp[i]); - __m128i v = _mm_srli_epi16(c, 15); - v = _mm_or_si128(v, _mm_and_si128(_mm_srli_epi16(c, 9), maskB)); - v = _mm_or_si128(v, _mm_and_si128(_mm_slli_epi16(c, 1), maskG)); - v = _mm_or_si128(v, _mm_slli_epi16(c, 11)); - _mm_store_si128(&dstp[i], v); - } - // The remainder is done in chunks of 2, SSE was chunks of 8. - int i = sseChunks * 8 / 2; -#else - int i = 0; - // TODO: NEON. -#endif - for (; i < (numPixels + 1) / 2; i++) { - u32 c = src[i]; - dst[i] = ((c >> 15) & 0x00010001) | - ((c >> 9) & 0x003E003E) | - ((c << 1) & 0x07C007C0) | - ((c << 11) & 0xF800F800); - } - } + ConvertRGBA5551ToABGR1555((u16 *)dst, (const u16 *)src, numPixels); break; case GL_UNSIGNED_SHORT_5_6_5: - { -#ifdef _M_SSE - const __m128i maskG = _mm_set1_epi16(0x07E0); - - __m128i *srcp = (__m128i *)src; - __m128i *dstp = (__m128i *)dst; - const int sseChunks = numPixels / 8; - for (int i = 0; i < sseChunks; ++i) { - __m128i c = _mm_load_si128(&srcp[i]); - __m128i v = _mm_srli_epi16(c, 11); - v = _mm_or_si128(v, _mm_and_si128(c, maskG)); - v = _mm_or_si128(v, _mm_slli_epi16(c, 11)); - _mm_store_si128(&dstp[i], v); - } - // The remainder is done in chunks of 2, SSE was chunks of 8. - int i = sseChunks * 8 / 2; -#else - int i = 0; - // TODO: NEON. -#endif - for (; i < (numPixels + 1) / 2; i++) { - u32 c = src[i]; - dst[i] = ((c >> 11) & 0x001F001F) | - ((c >> 0) & 0x07E007E0) | - ((c << 11) & 0xF800F800); - } - } + ConvertRGB565ToBGR565((u16 *)dst, (const u16 *)src, numPixels); break; default: if (UseBGRA8888()) { -#ifdef _M_SSE - const __m128i maskGA = _mm_set1_epi32(0xFF00FF00); - - __m128i *srcp = (__m128i *)src; - __m128i *dstp = (__m128i *)dst; - const int sseChunks = numPixels / 4; - for (int i = 0; i < sseChunks; ++i) { - __m128i c = _mm_load_si128(&srcp[i]); - __m128i rb = _mm_andnot_si128(maskGA, c); - c = _mm_and_si128(c, maskGA); - - __m128i b = _mm_srli_epi32(rb, 16); - __m128i r = _mm_slli_epi32(rb, 16); - c = _mm_or_si128(_mm_or_si128(c, r), b); - _mm_store_si128(&dstp[i], c); - } - // The remainder starts right after those done via SSE. - int i = sseChunks * 4; -#else - int i = 0; -#endif - for (; i < numPixels; i++) { - u32 c = src[i]; - dst[i] = ((c >> 16) & 0x000000FF) | - ((c >> 0) & 0xFF00FF00) | - ((c << 16) & 0x00FF0000); - } + ConvertRGBA8888ToBGRA8888(dst, src, numPixels); } else { // No need to convert RGBA8888, right order already if (dst != src) diff --git a/Qt/Common.pro b/Qt/Common.pro index 871c3cbaa..2bea1516b 100644 --- a/Qt/Common.pro +++ b/Qt/Common.pro @@ -43,6 +43,8 @@ win32 { HEADERS += $$P/Common/MemArena.h } +armv7: SOURCES += $$P/Common/ColorConvNEON.cpp + SOURCES += $$P/Common/ChunkFile.cpp \ $$P/Common/ColorConv.cpp \ $$P/Common/ConsoleListener.cpp \ diff --git a/android/jni/Android.mk b/android/jni/Android.mk index c209bba91..0170171c9 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -58,6 +58,7 @@ ARCH_FILES := \ $(SRC)/Common/ArmEmitter.cpp \ $(SRC)/Common/ArmCPUDetect.cpp \ $(SRC)/Common/ArmThunk.cpp \ + $(SRC)/Common/ColorConvNEON.cpp.neon \ $(SRC)/Core/MIPS/ARM/ArmCompALU.cpp \ $(SRC)/Core/MIPS/ARM/ArmCompBranch.cpp \ $(SRC)/Core/MIPS/ARM/ArmCompFPU.cpp \