mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-27 15:30:35 +00:00
Merge pull request #15482 from hrydgard/simplify-checkalpha
Delete a lot of specialized alpha checking code.
This commit is contained in:
commit
9b25093585
@ -839,6 +839,7 @@ void ReplacedTexture::PrepareData(int level) {
|
||||
|
||||
int w, h, f;
|
||||
uint8_t *image;
|
||||
|
||||
if (LoadZIMPtr(&zim[0], zimSize, &w, &h, &f, &image)) {
|
||||
if (w > info.w || h > info.h) {
|
||||
ERROR_LOG(G3D, "Texture replacement changed since header read: %s", info.file.c_str());
|
||||
@ -857,7 +858,7 @@ void ReplacedTexture::PrepareData(int level) {
|
||||
free(image);
|
||||
}
|
||||
|
||||
CheckAlphaResult res = CheckAlphaRGBA8888Basic((u32 *)&out[0], info.w, w, h);
|
||||
CheckAlphaResult res = CheckAlpha32Rect((u32 *)&out[0], info.w, w, h, 0xFF000000);
|
||||
if (res == CHECKALPHA_ANY || level == 0) {
|
||||
alphaStatus_ = ReplacedTextureAlpha(res);
|
||||
}
|
||||
@ -897,7 +898,7 @@ void ReplacedTexture::PrepareData(int level) {
|
||||
|
||||
if (!checkedAlpha) {
|
||||
// This will only check the hashed bits.
|
||||
CheckAlphaResult res = CheckAlphaRGBA8888Basic((u32 *)&out[0], info.w, png.width, png.height);
|
||||
CheckAlphaResult res = CheckAlpha32Rect((u32 *)&out[0], info.w, png.width, png.height, 0xFF000000);
|
||||
if (res == CHECKALPHA_ANY || level == 0) {
|
||||
alphaStatus_ = ReplacedTextureAlpha(res);
|
||||
}
|
||||
|
@ -1430,118 +1430,6 @@ inline u32 TfmtRawToFullAlpha(GETextureFormat fmt) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef _M_SSE
|
||||
inline u32 SSEReduce32And(__m128i value) {
|
||||
// TODO: Should use a shuffle instead of slri, probably.
|
||||
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
|
||||
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
|
||||
return _mm_cvtsi128_si32(value);
|
||||
}
|
||||
inline u32 SSEReduce16And(__m128i value) {
|
||||
// TODO: Should use a shuffle instead of slri, probably.
|
||||
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
|
||||
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
|
||||
value = _mm_and_si128(value, _mm_srli_si128(value, 16));
|
||||
return _mm_cvtsi128_si32(value);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PPSSPP_ARCH(ARM_NEON)
|
||||
inline u32 NEONReduce32And(uint32x4_t value) {
|
||||
// TODO: Maybe a shuffle and a vector and, or something?
|
||||
return vgetq_lane_u32(value, 0) & vgetq_lane_u32(value, 1) & vgetq_lane_u32(value, 2) & vgetq_lane_u32(value, 3);
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO: SSE/SIMD
|
||||
// At least on x86, compiler actually SIMDs these pretty well.
|
||||
void CopyAndSumMask16(u16 *dst, const u16 *src, int width, u32 *outMask) {
|
||||
u16 mask = 0xFFFF;
|
||||
for (int i = 0; i < width; i++) {
|
||||
u16 color = src[i];
|
||||
mask &= color;
|
||||
dst[i] = color;
|
||||
}
|
||||
*outMask &= (u32)mask;
|
||||
}
|
||||
|
||||
// Used in video playback so nice to have being fast.
|
||||
void CopyAndSumMask32(u32 *dst, const u32 *src, int width, u32 *outMask) {
|
||||
u32 mask = 0xFFFFFFFF;
|
||||
#ifdef _M_SSE
|
||||
if (width >= 4) {
|
||||
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
|
||||
while (width >= 4) {
|
||||
__m128i color = _mm_loadu_si128((__m128i *)src);
|
||||
wideMask = _mm_and_si128(wideMask, color);
|
||||
_mm_storeu_si128((__m128i *)dst, color);
|
||||
src += 4;
|
||||
dst += 4;
|
||||
width -= 4;
|
||||
}
|
||||
mask = SSEReduce32And(wideMask);
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON)
|
||||
if (width >= 4) {
|
||||
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
|
||||
while (width >= 4) {
|
||||
uint32x4_t colors = vld1q_u32(src);
|
||||
wideMask = vandq_u32(wideMask, colors);
|
||||
vst1q_u32(dst, colors);
|
||||
src += 4;
|
||||
dst += 4;
|
||||
width -= 4;
|
||||
}
|
||||
mask = NEONReduce32And(wideMask);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
u32 color = src[i];
|
||||
mask &= color;
|
||||
dst[i] = color;
|
||||
}
|
||||
*outMask &= (u32)mask;
|
||||
}
|
||||
|
||||
void CheckMask16(const u16 *src, int width, u32 *outMask) {
|
||||
u16 mask = 0xFFFF;
|
||||
for (int i = 0; i < width; i++) {
|
||||
mask &= src[i];
|
||||
}
|
||||
*outMask &= (u32)mask;
|
||||
}
|
||||
|
||||
void CheckMask32(const u32 *src, int width, u32 *outMask) {
|
||||
u32 mask = 0xFFFFFFFF;
|
||||
#ifdef _M_SSE
|
||||
if (width >= 4) {
|
||||
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
|
||||
while (width >= 4) {
|
||||
wideMask = _mm_and_si128(wideMask, _mm_loadu_si128((__m128i *)src));
|
||||
src += 4;
|
||||
width -= 4;
|
||||
}
|
||||
mask = SSEReduce32And(wideMask);
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON)
|
||||
if (width >= 4) {
|
||||
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
|
||||
while (width >= 4) {
|
||||
wideMask = vandq_u32(wideMask, vld1q_u32(src));
|
||||
src += 4;
|
||||
width -= 4;
|
||||
}
|
||||
mask = NEONReduce32And(wideMask);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
mask &= src[i];
|
||||
}
|
||||
*outMask &= (u32)mask;
|
||||
}
|
||||
|
||||
CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool useBGRA, bool expandTo32bit) {
|
||||
u32 alphaSum = 0xFFFFFFFF;
|
||||
u32 fullAlphaMask = 0x0;
|
||||
|
@ -41,6 +41,13 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __clang__
|
||||
// Weird how you can't just use #pragma in a macro.
|
||||
#define DO_NOT_VECTORIZE_LOOP _Pragma("clang loop vectorize(disable)")
|
||||
#else
|
||||
#define DO_NOT_VECTORIZE_LOOP
|
||||
#endif
|
||||
|
||||
#ifdef _M_SSE
|
||||
|
||||
static u32 QuickTexHashSSE2(const void *checkp, u32 size) {
|
||||
@ -647,417 +654,173 @@ void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int height) {
|
||||
}
|
||||
|
||||
#ifdef _M_SSE
|
||||
static inline u32 CombineSSEBitsToDWORD(const __m128i &v) {
|
||||
__m128i temp;
|
||||
temp = _mm_or_si128(v, _mm_srli_si128(v, 8));
|
||||
temp = _mm_or_si128(temp, _mm_srli_si128(temp, 4));
|
||||
return _mm_cvtsi128_si32(temp);
|
||||
inline u32 SSEReduce32And(__m128i value) {
|
||||
// TODO: Should use a shuffle instead of slri, probably.
|
||||
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
|
||||
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
|
||||
return _mm_cvtsi128_si32(value);
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA8888SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
const __m128i mask = _mm_set1_epi32(0xFF000000);
|
||||
|
||||
const __m128i *p = (const __m128i *)pixelData;
|
||||
const int w4 = w / 4;
|
||||
const int stride4 = stride / 4;
|
||||
|
||||
__m128i bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w4; ++i) {
|
||||
const __m128i a = _mm_load_si128(&p[i]);
|
||||
bits = _mm_and_si128(bits, a);
|
||||
}
|
||||
|
||||
__m128i result = _mm_xor_si128(bits, mask);
|
||||
if (CombineSSEBitsToDWORD(result) != 0) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride4;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
inline u32 SSEReduce16And(__m128i value) {
|
||||
// TODO: Should use a shuffle instead of slri, probably.
|
||||
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
|
||||
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
|
||||
u32 mask = _mm_cvtsi128_si32(value);
|
||||
return mask & (mask >> 16);
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR4444SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
const __m128i mask = _mm_set1_epi16((short)0x000F);
|
||||
|
||||
const __m128i *p = (const __m128i *)pixelData;
|
||||
const int w8 = w / 8;
|
||||
const int stride8 = stride / 8;
|
||||
|
||||
__m128i bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w8; ++i) {
|
||||
const __m128i a = _mm_load_si128(&p[i]);
|
||||
bits = _mm_and_si128(bits, a);
|
||||
}
|
||||
|
||||
__m128i result = _mm_xor_si128(bits, mask);
|
||||
if (CombineSSEBitsToDWORD(result) != 0) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride8;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR1555SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
const __m128i mask = _mm_set1_epi16((short)0x0001);
|
||||
|
||||
const __m128i *p = (const __m128i *)pixelData;
|
||||
const int w8 = w / 8;
|
||||
const int stride8 = stride / 8;
|
||||
|
||||
__m128i bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w8; ++i) {
|
||||
const __m128i a = _mm_load_si128(&p[i]);
|
||||
bits = _mm_and_si128(bits, a);
|
||||
}
|
||||
|
||||
__m128i result = _mm_xor_si128(bits, mask);
|
||||
if (CombineSSEBitsToDWORD(result) != 0) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride8;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA4444SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
const __m128i mask = _mm_set1_epi16((short)0xF000);
|
||||
|
||||
const __m128i *p = (const __m128i *)pixelData;
|
||||
const int w8 = w / 8;
|
||||
const int stride8 = stride / 8;
|
||||
|
||||
__m128i bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w8; ++i) {
|
||||
const __m128i a = _mm_load_si128(&p[i]);
|
||||
bits = _mm_and_si128(bits, a);
|
||||
}
|
||||
|
||||
__m128i result = _mm_xor_si128(bits, mask);
|
||||
if (CombineSSEBitsToDWORD(result) != 0) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride8;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA5551SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
const __m128i mask = _mm_set1_epi16((short)0x8000);
|
||||
|
||||
const __m128i *p = (const __m128i *)pixelData;
|
||||
const int w8 = w / 8;
|
||||
const int stride8 = stride / 8;
|
||||
|
||||
__m128i bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w8; ++i) {
|
||||
const __m128i a = _mm_load_si128(&p[i]);
|
||||
bits = _mm_and_si128(bits, a);
|
||||
}
|
||||
|
||||
__m128i result = _mm_xor_si128(bits, mask);
|
||||
if (CombineSSEBitsToDWORD(result) != 0) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride8;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
#endif // _M_SSE
|
||||
#endif
|
||||
|
||||
#if PPSSPP_ARCH(ARM_NEON)
|
||||
|
||||
static inline bool VectorIsNonZeroNEON(const uint32x4_t &v) {
|
||||
u64 low = vgetq_lane_u64(vreinterpretq_u64_u32(v), 0);
|
||||
u64 high = vgetq_lane_u64(vreinterpretq_u64_u32(v), 1);
|
||||
|
||||
return (low | high) != 0;
|
||||
inline u32 NEONReduce32And(uint32x4_t value) {
|
||||
// TODO: Maybe a shuffle and a vector and, or something?
|
||||
return vgetq_lane_u32(value, 0) & vgetq_lane_u32(value, 1) & vgetq_lane_u32(value, 2) & vgetq_lane_u32(value, 3);
|
||||
}
|
||||
|
||||
#ifndef _MSC_VER
|
||||
// MSVC consider this function the same as the one above! uint16x8_t is typedef'd to the same type as uint32x4_t.
|
||||
static inline bool VectorIsNonZeroNEON(const uint16x8_t &v) {
|
||||
u64 low = vgetq_lane_u64(vreinterpretq_u64_u16(v), 0);
|
||||
u64 high = vgetq_lane_u64(vreinterpretq_u64_u16(v), 1);
|
||||
|
||||
return (low | high) != 0;
|
||||
inline u32 NEONReduce16And(uint16x8_t value) {
|
||||
uint32x4_t value32 = vreinterpretq_u32_u16(value);
|
||||
// TODO: Maybe a shuffle and a vector and, or something?
|
||||
u32 mask = vgetq_lane_u32(value32, 0) & vgetq_lane_u32(value32, 1) & vgetq_lane_u32(value32, 2) & vgetq_lane_u32(value32, 3);
|
||||
return mask & (mask >> 16);
|
||||
}
|
||||
#endif
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const u32 *p = (const u32 *)pixelData;
|
||||
|
||||
const uint32x4_t mask = vdupq_n_u32(0xFF000000);
|
||||
uint32x4_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w; i += 4) {
|
||||
const uint32x4_t a = vld1q_u32(&p[i]);
|
||||
bits = vandq_u32(bits, a);
|
||||
}
|
||||
|
||||
uint32x4_t result = veorq_u32(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
const uint16x8_t mask = vdupq_n_u16((u16)0x000F);
|
||||
uint16x8_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vld1q_u16(&p[i]);
|
||||
bits = vandq_u16(bits, a);
|
||||
}
|
||||
|
||||
uint16x8_t result = veorq_u16(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR1555NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
const uint16x8_t mask = vdupq_n_u16((u16)0x0001);
|
||||
uint16x8_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vld1q_u16(&p[i]);
|
||||
bits = vandq_u16(bits, a);
|
||||
}
|
||||
|
||||
uint16x8_t result = veorq_u16(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA4444NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
const uint16x8_t mask = vdupq_n_u16((u16)0xF000);
|
||||
uint16x8_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vld1q_u16(&p[i]);
|
||||
bits = vandq_u16(bits, a);
|
||||
}
|
||||
|
||||
uint16x8_t result = veorq_u16(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA5551NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
const uint16x8_t mask = vdupq_n_u16((u16)0x8000);
|
||||
uint16x8_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vld1q_u16(&p[i]);
|
||||
bits = vandq_u16(bits, a);
|
||||
}
|
||||
|
||||
uint16x8_t result = veorq_u16(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA8888Basic(const u32 *pixelData, int stride, int w, int h) {
|
||||
// Use SIMD if aligned to 16 bytes / 4 pixels (almost always the case.)
|
||||
if ((w & 3) == 0 && (stride & 3) == 0) {
|
||||
// TODO: SSE/SIMD
|
||||
// At least on x86, compiler actually SIMDs these pretty well.
|
||||
void CopyAndSumMask16(u16 *dst, const u16 *src, int width, u32 *outMask) {
|
||||
u16 mask = 0xFFFF;
|
||||
#ifdef _M_SSE
|
||||
return CheckAlphaRGBA8888SSE2(pixelData, stride, w, h);
|
||||
if (width >= 8) {
|
||||
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
|
||||
while (width >= 8) {
|
||||
__m128i color = _mm_loadu_si128((__m128i *)src);
|
||||
wideMask = _mm_and_si128(wideMask, color);
|
||||
_mm_storeu_si128((__m128i *)dst, color);
|
||||
src += 8;
|
||||
dst += 8;
|
||||
width -= 8;
|
||||
}
|
||||
mask = SSEReduce16And(wideMask);
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON)
|
||||
return CheckAlphaRGBA8888NEON(pixelData, stride, w, h);
|
||||
if (width >= 8) {
|
||||
uint16x8_t wideMask = vdupq_n_u16(0xFFFF);
|
||||
while (width >= 8) {
|
||||
uint16x8_t colors = vld1q_u16(src);
|
||||
wideMask = vandq_u16(wideMask, colors);
|
||||
vst1q_u16(dst, colors);
|
||||
src += 8;
|
||||
dst += 8;
|
||||
width -= 8;
|
||||
}
|
||||
mask = NEONReduce16And(wideMask);
|
||||
}
|
||||
#endif
|
||||
|
||||
DO_NOT_VECTORIZE_LOOP
|
||||
for (int i = 0; i < width; i++) {
|
||||
u16 color = src[i];
|
||||
mask &= color;
|
||||
dst[i] = color;
|
||||
}
|
||||
|
||||
const u32 *p = pixelData;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
u32 bits = 0xFF000000;
|
||||
for (int i = 0; i < w; ++i) {
|
||||
bits &= p[i];
|
||||
}
|
||||
|
||||
if (bits != 0xFF000000) {
|
||||
// We're done, we hit non-full alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
*outMask &= (u32)mask;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR4444Basic(const u32 *pixelData, int stride, int w, int h) {
|
||||
// Use SIMD if aligned to 16 bytes / 8 pixels (usually the case.)
|
||||
if ((w & 7) == 0 && (stride & 7) == 0) {
|
||||
// Used in video playback so nice to have being fast.
|
||||
void CopyAndSumMask32(u32 *dst, const u32 *src, int width, u32 *outMask) {
|
||||
u32 mask = 0xFFFFFFFF;
|
||||
#ifdef _M_SSE
|
||||
return CheckAlphaABGR4444SSE2(pixelData, stride, w, h);
|
||||
if (width >= 4) {
|
||||
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
|
||||
while (width >= 4) {
|
||||
__m128i color = _mm_loadu_si128((__m128i *)src);
|
||||
wideMask = _mm_and_si128(wideMask, color);
|
||||
_mm_storeu_si128((__m128i *)dst, color);
|
||||
src += 4;
|
||||
dst += 4;
|
||||
width -= 4;
|
||||
}
|
||||
mask = SSEReduce32And(wideMask);
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON)
|
||||
return CheckAlphaABGR4444NEON(pixelData, stride, w, h);
|
||||
if (width >= 4) {
|
||||
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
|
||||
while (width >= 4) {
|
||||
uint32x4_t colors = vld1q_u32(src);
|
||||
wideMask = vandq_u32(wideMask, colors);
|
||||
vst1q_u32(dst, colors);
|
||||
src += 4;
|
||||
dst += 4;
|
||||
width -= 4;
|
||||
}
|
||||
mask = NEONReduce32And(wideMask);
|
||||
}
|
||||
#endif
|
||||
|
||||
DO_NOT_VECTORIZE_LOOP
|
||||
for (int i = 0; i < width; i++) {
|
||||
u32 color = src[i];
|
||||
mask &= color;
|
||||
dst[i] = color;
|
||||
}
|
||||
|
||||
const u32 *p = pixelData;
|
||||
const int w2 = (w + 1) / 2;
|
||||
const int stride2 = (stride + 1) / 2;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
u32 bits = 0x000F000F;
|
||||
for (int i = 0; i < w2; ++i) {
|
||||
bits &= p[i];
|
||||
}
|
||||
|
||||
if (bits != 0x000F000F) {
|
||||
// We're done, we hit non-full alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride2;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
*outMask &= (u32)mask;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w, int h) {
|
||||
// Use SIMD if aligned to 16 bytes / 8 pixels (usually the case.)
|
||||
if ((w & 7) == 0 && (stride & 7) == 0) {
|
||||
void CheckMask16(const u16 *src, int width, u32 *outMask) {
|
||||
u16 mask = 0xFFFF;
|
||||
#ifdef _M_SSE
|
||||
return CheckAlphaABGR1555SSE2(pixelData, stride, w, h);
|
||||
if (width >= 8) {
|
||||
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
|
||||
while (width >= 8) {
|
||||
wideMask = _mm_and_si128(wideMask, _mm_loadu_si128((__m128i *)src));
|
||||
src += 8;
|
||||
width -= 8;
|
||||
}
|
||||
mask = SSEReduce16And(wideMask);
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON)
|
||||
return CheckAlphaABGR1555NEON(pixelData, stride, w, h);
|
||||
if (width >= 8) {
|
||||
uint16x8_t wideMask = vdupq_n_u16(0xFFFF);
|
||||
while (width >= 8) {
|
||||
wideMask = vandq_u16(wideMask, vld1q_u16(src));
|
||||
src += 8;
|
||||
width -= 8;
|
||||
}
|
||||
mask = NEONReduce16And(wideMask);
|
||||
}
|
||||
#endif
|
||||
|
||||
DO_NOT_VECTORIZE_LOOP
|
||||
for (int i = 0; i < width; i++) {
|
||||
mask &= src[i];
|
||||
}
|
||||
|
||||
const u32 *p = pixelData;
|
||||
const int w2 = (w + 1) / 2;
|
||||
const int stride2 = (stride + 1) / 2;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
u32 bits = 0x00010001;
|
||||
for (int i = 0; i < w2; ++i) {
|
||||
bits &= p[i];
|
||||
}
|
||||
|
||||
if (bits != 0x00010001) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride2;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
*outMask &= (u32)mask;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA4444Basic(const u32 *pixelData, int stride, int w, int h) {
|
||||
// Use SSE if aligned to 16 bytes / 8 pixels (usually the case.)
|
||||
if ((w & 7) == 0 && (stride & 7) == 0) {
|
||||
void CheckMask32(const u32 *src, int width, u32 *outMask) {
|
||||
u32 mask = 0xFFFFFFFF;
|
||||
#ifdef _M_SSE
|
||||
return CheckAlphaRGBA4444SSE2(pixelData, stride, w, h);
|
||||
if (width >= 4) {
|
||||
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
|
||||
while (width >= 4) {
|
||||
wideMask = _mm_and_si128(wideMask, _mm_loadu_si128((__m128i *)src));
|
||||
src += 4;
|
||||
width -= 4;
|
||||
}
|
||||
mask = SSEReduce32And(wideMask);
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON)
|
||||
return CheckAlphaRGBA4444NEON(pixelData, stride, w, h);
|
||||
if (width >= 4) {
|
||||
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
|
||||
while (width >= 4) {
|
||||
wideMask = vandq_u32(wideMask, vld1q_u32(src));
|
||||
src += 4;
|
||||
width -= 4;
|
||||
}
|
||||
mask = NEONReduce32And(wideMask);
|
||||
}
|
||||
#endif
|
||||
|
||||
DO_NOT_VECTORIZE_LOOP
|
||||
for (int i = 0; i < width; i++) {
|
||||
mask &= src[i];
|
||||
}
|
||||
|
||||
const u32 *p = pixelData;
|
||||
const int w2 = (w + 1) / 2;
|
||||
const int stride2 = (stride + 1) / 2;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
u32 bits = 0xF000F000;
|
||||
for (int i = 0; i < w2; ++i) {
|
||||
bits &= p[i];
|
||||
}
|
||||
|
||||
if (bits != 0xF000F000) {
|
||||
// We're done, we hit non-full alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride2;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA5551Basic(const u32 *pixelData, int stride, int w, int h) {
|
||||
// Use SSE if aligned to 16 bytes / 8 pixels (usually the case.)
|
||||
if ((w & 7) == 0 && (stride & 7) == 0) {
|
||||
#ifdef _M_SSE
|
||||
return CheckAlphaRGBA5551SSE2(pixelData, stride, w, h);
|
||||
#elif PPSSPP_ARCH(ARM_NEON)
|
||||
return CheckAlphaRGBA5551NEON(pixelData, stride, w, h);
|
||||
#endif
|
||||
}
|
||||
|
||||
const u32 *p = pixelData;
|
||||
const int w2 = (w + 1) / 2;
|
||||
const int stride2 = (stride + 1) / 2;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
u32 bits = 0x80008000;
|
||||
for (int i = 0; i < w2; ++i) {
|
||||
bits &= p[i];
|
||||
}
|
||||
|
||||
if (bits != 0x80008000) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride2;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
*outMask &= (u32)mask;
|
||||
}
|
||||
|
@ -38,11 +38,11 @@ void DoUnswizzleTex16(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch
|
||||
|
||||
u32 StableQuickTexHash(const void *checkp, u32 size);
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA8888Basic(const u32 *pixelData, int stride, int w, int h);
|
||||
CheckAlphaResult CheckAlphaABGR4444Basic(const u32 *pixelData, int stride, int w, int h);
|
||||
CheckAlphaResult CheckAlphaRGBA4444Basic(const u32 *pixelData, int stride, int w, int h);
|
||||
CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w, int h);
|
||||
CheckAlphaResult CheckAlphaRGBA5551Basic(const u32 *pixelData, int stride, int w, int h);
|
||||
// outMask is an in/out parameter.
|
||||
void CopyAndSumMask16(u16 *dst, const u16 *src, int width, u32 *outMask);
|
||||
void CopyAndSumMask32(u32 *dst, const u32 *src, int width, u32 *outMask);
|
||||
void CheckMask16(const u16 *src, int width, u32 *outMask);
|
||||
void CheckMask32(const u32 *src, int width, u32 *outMask);
|
||||
|
||||
// All these DXT structs are in the reverse order, as compared to PC.
|
||||
// On PC, alpha comes before color, and interpolants are before the tile data.
|
||||
@ -98,6 +98,26 @@ inline bool AlphaSumIsFull(u32 alphaSum, u32 fullAlphaMask) {
|
||||
return fullAlphaMask != 0 && (alphaSum & fullAlphaMask) == fullAlphaMask;
|
||||
}
|
||||
|
||||
inline CheckAlphaResult CheckAlpha16(const u16 *pixelData, int width, u32 fullAlphaMask) {
|
||||
u32 alphaSum = 0xFFFFFFFF;
|
||||
CheckMask16(pixelData, width, &alphaSum);
|
||||
return AlphaSumIsFull(alphaSum, fullAlphaMask) ? CHECKALPHA_FULL : CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
inline CheckAlphaResult CheckAlpha32(const u32 *pixelData, int width, u32 fullAlphaMask) {
|
||||
u32 alphaSum = 0xFFFFFFFF;
|
||||
CheckMask32(pixelData, width, &alphaSum);
|
||||
return AlphaSumIsFull(alphaSum, fullAlphaMask) ? CHECKALPHA_FULL : CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
inline CheckAlphaResult CheckAlpha32Rect(const u32 *pixelData, int stride, int width, int height, u32 fullAlphaMask) {
|
||||
u32 alphaSum = 0xFFFFFFFF;
|
||||
for (int y = 0; y < height; y++) {
|
||||
CheckMask32(pixelData + stride * y, width, &alphaSum);
|
||||
}
|
||||
return AlphaSumIsFull(alphaSum, fullAlphaMask) ? CHECKALPHA_FULL : CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
template <typename IndexT, typename ClutT>
|
||||
inline void DeIndexTexture(/*WRITEONLY*/ ClutT *dest, const IndexT *indexed, int length, const ClutT *clut, u32 *outAlphaSum) {
|
||||
// Usually, there is no special offset, mask, or shift.
|
||||
|
@ -418,8 +418,8 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer,
|
||||
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
|
||||
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
|
||||
|
||||
TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, GetClutDestFormatD3D11(clutFormat), clutTotalColors, clutTotalColors, 1);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
CheckAlphaResult alphaStatus = CheckAlpha(clutBuf_, GetClutDestFormatD3D11(clutFormat), clutTotalColors);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
|
||||
} else {
|
||||
gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
|
||||
framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFramebuffer");
|
||||
@ -591,25 +591,18 @@ DXGI_FORMAT TextureCacheD3D11::GetDestFormat(GETextureFormat format, GEPaletteFo
|
||||
}
|
||||
}
|
||||
|
||||
TexCacheEntry::TexStatus TextureCacheD3D11::CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h) {
|
||||
CheckAlphaResult res;
|
||||
CheckAlphaResult TextureCacheD3D11::CheckAlpha(const u32 *pixelData, u32 dstFmt, int w) {
|
||||
switch (dstFmt) {
|
||||
case DXGI_FORMAT_B4G4R4A4_UNORM:
|
||||
res = CheckAlphaRGBA4444Basic(pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha16((const u16 *)pixelData, w, 0xF000);
|
||||
case DXGI_FORMAT_B5G5R5A1_UNORM:
|
||||
res = CheckAlphaRGBA5551Basic(pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha16((const u16 *)pixelData, w, 0x8000);
|
||||
case DXGI_FORMAT_B5G6R5_UNORM:
|
||||
// Never has any alpha.
|
||||
res = CHECKALPHA_FULL;
|
||||
break;
|
||||
return CHECKALPHA_FULL;
|
||||
default:
|
||||
res = CheckAlphaRGBA8888Basic(pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha32((const u32 *)pixelData, w, 0xFF000000);
|
||||
}
|
||||
|
||||
return (TexCacheEntry::TexStatus)res;
|
||||
}
|
||||
|
||||
ReplacedTextureFormat FromD3D11Format(u32 fmt) {
|
||||
|
@ -70,7 +70,7 @@ protected:
|
||||
private:
|
||||
void LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, int scaleFactor, DXGI_FORMAT dstFmt);
|
||||
DXGI_FORMAT GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
|
||||
static TexCacheEntry::TexStatus CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h);
|
||||
static CheckAlphaResult CheckAlpha(const u32 *pixelData, u32 dstFmt, int w);
|
||||
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;
|
||||
|
||||
void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override;
|
||||
|
@ -375,8 +375,8 @@ void TextureCacheDX9::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, G
|
||||
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
|
||||
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
|
||||
|
||||
TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
CheckAlphaResult alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(clutFormat), clutTotalColors);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
|
||||
} else {
|
||||
framebufferManagerDX9_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
|
||||
|
||||
@ -525,25 +525,18 @@ D3DFORMAT TextureCacheDX9::GetDestFormat(GETextureFormat format, GEPaletteFormat
|
||||
}
|
||||
}
|
||||
|
||||
TexCacheEntry::TexStatus TextureCacheDX9::CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h) {
|
||||
CheckAlphaResult res;
|
||||
CheckAlphaResult TextureCacheDX9::CheckAlpha(const u32 *pixelData, u32 dstFmt, int w) {
|
||||
switch (dstFmt) {
|
||||
case D3DFMT_A4R4G4B4:
|
||||
res = CheckAlphaRGBA4444Basic(pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha16((const u16 *)pixelData, w, 0xF000);
|
||||
case D3DFMT_A1R5G5B5:
|
||||
res = CheckAlphaRGBA5551Basic(pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha16((const u16 *)pixelData, w, 0x8000);
|
||||
case D3DFMT_R5G6B5:
|
||||
// Never has any alpha.
|
||||
res = CHECKALPHA_FULL;
|
||||
break;
|
||||
return CHECKALPHA_FULL;
|
||||
default:
|
||||
res = CheckAlphaRGBA8888Basic(pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha32(pixelData, w, 0xFF000000);
|
||||
}
|
||||
|
||||
return (TexCacheEntry::TexStatus)res;
|
||||
}
|
||||
|
||||
ReplacedTextureFormat FromD3D9Format(u32 fmt) {
|
||||
|
@ -64,7 +64,7 @@ private:
|
||||
|
||||
void LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, int scaleFactor, u32 dstFmt);
|
||||
D3DFORMAT GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
|
||||
static TexCacheEntry::TexStatus CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h);
|
||||
static CheckAlphaResult CheckAlpha(const u32 *pixelData, u32 dstFmt, int w);
|
||||
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;
|
||||
|
||||
void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override;
|
||||
|
@ -373,8 +373,8 @@ void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer,
|
||||
gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
|
||||
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
|
||||
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
|
||||
TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
CheckAlphaResult alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -407,8 +407,8 @@ void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer,
|
||||
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
|
||||
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
|
||||
|
||||
TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
CheckAlphaResult alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
|
||||
} else {
|
||||
framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
|
||||
|
||||
@ -614,25 +614,18 @@ Draw::DataFormat TextureCacheGLES::GetDestFormat(GETextureFormat format, GEPalet
|
||||
}
|
||||
}
|
||||
|
||||
TexCacheEntry::TexStatus TextureCacheGLES::CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int stride, int w, int h) {
|
||||
CheckAlphaResult res;
|
||||
CheckAlphaResult TextureCacheGLES::CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int w) {
|
||||
switch (dstFmt) {
|
||||
case Draw::DataFormat::R4G4B4A4_UNORM_PACK16:
|
||||
res = CheckAlphaABGR4444Basic((const uint32_t *)pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha16((const u16 *)pixelData, w, 0x000F);
|
||||
case Draw::DataFormat::R5G5B5A1_UNORM_PACK16:
|
||||
res = CheckAlphaABGR1555Basic((const uint32_t *)pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha16((const u16 *)pixelData, w, 0x0001);
|
||||
case Draw::DataFormat::R5G6B5_UNORM_PACK16:
|
||||
// Never has any alpha.
|
||||
res = CHECKALPHA_FULL;
|
||||
break;
|
||||
return CHECKALPHA_FULL;
|
||||
default:
|
||||
res = CheckAlphaRGBA8888Basic((const uint32_t *)pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha32((const u32 *)pixelData, w, 0xFF000000); // note, the normal order here, unlike the 16-bit formats
|
||||
}
|
||||
|
||||
return (TexCacheEntry::TexStatus)res;
|
||||
}
|
||||
|
||||
void TextureCacheGLES::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int scaleFactor, Draw::DataFormat dstFmt) {
|
||||
|
@ -73,7 +73,7 @@ private:
|
||||
void LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int scaleFactor, Draw::DataFormat dstFmt);
|
||||
Draw::DataFormat GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
|
||||
|
||||
static TexCacheEntry::TexStatus CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int stride, int w, int h);
|
||||
static CheckAlphaResult CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int w);
|
||||
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;
|
||||
void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override;
|
||||
|
||||
|
@ -428,8 +428,8 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
||||
gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
|
||||
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
|
||||
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
|
||||
TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors, clutTotalColors, 1);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
CheckAlphaResult alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
|
||||
curSampler_ = samplerCache_.GetOrCreateSampler(samplerKey);
|
||||
if (framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET)) {
|
||||
imageView_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE0_IMAGEVIEW);
|
||||
@ -532,8 +532,8 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
||||
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
|
||||
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
|
||||
|
||||
TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors, clutTotalColors, 1);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
CheckAlphaResult alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
|
||||
|
||||
framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFramebuffer");
|
||||
draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0);
|
||||
@ -936,25 +936,18 @@ VkFormat TextureCacheVulkan::GetDestFormat(GETextureFormat format, GEPaletteForm
|
||||
}
|
||||
}
|
||||
|
||||
TexCacheEntry::TexStatus TextureCacheVulkan::CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int stride, int w, int h) {
|
||||
CheckAlphaResult res;
|
||||
CheckAlphaResult TextureCacheVulkan::CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int w) {
|
||||
switch (dstFmt) {
|
||||
case VULKAN_4444_FORMAT:
|
||||
res = CheckAlphaRGBA4444Basic(pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha16((const u16 *)pixelData, w, 0xF000);
|
||||
case VULKAN_1555_FORMAT:
|
||||
res = CheckAlphaRGBA5551Basic(pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha16((const u16 *)pixelData, w, 0x8000);
|
||||
case VULKAN_565_FORMAT:
|
||||
// Never has any alpha.
|
||||
res = CHECKALPHA_FULL;
|
||||
break;
|
||||
return CHECKALPHA_FULL;
|
||||
default:
|
||||
res = CheckAlphaRGBA8888Basic(pixelData, stride, w, h);
|
||||
break;
|
||||
return CheckAlpha32(pixelData, w, 0xFF000000);
|
||||
}
|
||||
|
||||
return (TexCacheEntry::TexStatus)res;
|
||||
}
|
||||
|
||||
void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePtr, int rowPitch, int level, int scaleFactor, VkFormat dstFmt) {
|
||||
|
@ -107,7 +107,7 @@ protected:
|
||||
private:
|
||||
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePtr, int rowPitch, int level, int scaleFactor, VkFormat dstFmt);
|
||||
VkFormat GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
|
||||
static TexCacheEntry::TexStatus CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int stride, int w, int h);
|
||||
static CheckAlphaResult CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int w);
|
||||
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;
|
||||
|
||||
void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override;
|
||||
|
Loading…
Reference in New Issue
Block a user