mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-25 16:41:04 +00:00
Merge pull request #10108 from unknownbrackets/vulkan-scaling
Remove texture checks for simple alpha
This commit is contained in:
commit
a1b3a4384c
@ -561,8 +561,6 @@ void ReplacedTexture::Load(int level, void *out, int rowPitch) {
|
||||
CheckAlphaResult res = CheckAlphaRGBA8888Basic((u32 *)out, rowPitch / sizeof(u32), png.width, png.height);
|
||||
if (res == CHECKALPHA_ANY || level == 0) {
|
||||
alphaStatus_ = ReplacedTextureAlpha(res);
|
||||
} else if (res == CHECKALPHA_ZERO && alphaStatus_ == ReplacedTextureAlpha::FULL) {
|
||||
alphaStatus_ = ReplacedTextureAlpha(res);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1467,7 +1467,6 @@ void TextureCacheCommon::ApplyTexture() {
|
||||
} else {
|
||||
BindTexture(entry);
|
||||
gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
gstate_c.SetTextureSimpleAlpha(entry->GetAlphaStatus() != TexCacheEntry::STATUS_ALPHA_UNKNOWN);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -99,8 +99,9 @@ struct TexCacheEntry {
|
||||
|
||||
STATUS_ALPHA_UNKNOWN = 0x04,
|
||||
STATUS_ALPHA_FULL = 0x00, // Has no alpha channel, or always full alpha.
|
||||
STATUS_ALPHA_SIMPLE = 0x08, // Like above, but also has 0 alpha (e.g. 5551.)
|
||||
STATUS_ALPHA_MASK = 0x0c,
|
||||
STATUS_ALPHA_MASK = 0x04,
|
||||
|
||||
// 0x08 free.
|
||||
|
||||
STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 6 frames in between.)
|
||||
STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail.
|
||||
@ -166,8 +167,6 @@ struct TexCacheEntry {
|
||||
// For non-level zero, only set more restrictive.
|
||||
if (newStatus == STATUS_ALPHA_UNKNOWN || level == 0) {
|
||||
SetAlphaStatus(newStatus);
|
||||
} else if (newStatus == STATUS_ALPHA_SIMPLE && GetAlphaStatus() == STATUS_ALPHA_FULL) {
|
||||
SetAlphaStatus(STATUS_ALPHA_SIMPLE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -438,92 +438,32 @@ static inline u32 CombineSSEBitsToDWORD(const __m128i &v) {
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA8888SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i full = _mm_set1_epi32(0xFF);
|
||||
const __m128i mask = _mm_set1_epi32(0xFF000000);
|
||||
|
||||
const __m128i *p = (const __m128i *)pixelData;
|
||||
const int w4 = w / 4;
|
||||
const int stride4 = stride / 4;
|
||||
|
||||
// Have alpha values == 0 been seen?
|
||||
__m128i hasZeroCursor = _mm_setzero_si128();
|
||||
__m128i bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
// Have alpha values > 0 and < 0xFF been seen?
|
||||
__m128i hasAnyCursor = _mm_setzero_si128();
|
||||
|
||||
for (int i = 0; i < w4; ++i) {
|
||||
const __m128i a = _mm_srli_epi32(_mm_load_si128(&p[i]), 24);
|
||||
|
||||
const __m128i isZero = _mm_cmpeq_epi32(a, zero);
|
||||
hasZeroCursor = _mm_or_si128(hasZeroCursor, isZero);
|
||||
|
||||
// If a = FF, isNotFull will be 0 -> hasAny will be 0.
|
||||
// If a = 00, a & isNotFull will be 0 -> hasAny will be 0.
|
||||
// In any other case, hasAny will have some bits set.
|
||||
const __m128i isNotFull = _mm_cmplt_epi32(a, full);
|
||||
hasAnyCursor = _mm_or_si128(hasAnyCursor, _mm_and_si128(a, isNotFull));
|
||||
const __m128i a = _mm_load_si128(&p[i]);
|
||||
bits = _mm_and_si128(bits, a);
|
||||
}
|
||||
p += stride4;
|
||||
|
||||
// We check any early, in case we can skip the rest of the rows.
|
||||
if (CombineSSEBitsToDWORD(hasAnyCursor) != 0) {
|
||||
__m128i result = _mm_xor_si128(bits, mask);
|
||||
if (CombineSSEBitsToDWORD(result) != 0) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride4;
|
||||
}
|
||||
|
||||
// Now let's sum up the bits.
|
||||
if (CombineSSEBitsToDWORD(hasZeroCursor) != 0) {
|
||||
return CHECKALPHA_ZERO;
|
||||
} else {
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR4444SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i full = _mm_set1_epi16((short)0xF000);
|
||||
|
||||
const __m128i *p = (const __m128i *)pixelData;
|
||||
const int w8 = w / 8;
|
||||
const int stride8 = stride / 8;
|
||||
|
||||
__m128i hasZeroCursor = _mm_setzero_si128();
|
||||
for (int y = 0; y < h; ++y) {
|
||||
__m128i hasAnyCursor = _mm_setzero_si128();
|
||||
|
||||
for (int i = 0; i < w8; ++i) {
|
||||
// This moves XXXA to A000.
|
||||
const __m128i a = _mm_slli_epi16(_mm_load_si128(&p[i]), 12);
|
||||
|
||||
// At least one bit in isZero, and therefore hasZeroCursor, will get set if there's a zero.
|
||||
const __m128i isZero = _mm_cmpeq_epi16(a, zero);
|
||||
hasZeroCursor = _mm_or_si128(hasZeroCursor, isZero);
|
||||
|
||||
// If a = F, isFull will be 1 -> hasAny will be 0.
|
||||
// If a = 0, a & !isFull will be 0 -> hasAny will be 0.
|
||||
// In any other case, hasAny will have some bits set.
|
||||
const __m128i isFull = _mm_cmpeq_epi32(a, full);
|
||||
const __m128i hasAny = _mm_andnot_si128(isFull, a);
|
||||
hasAnyCursor = _mm_or_si128(hasAnyCursor, hasAny);
|
||||
}
|
||||
p += stride8;
|
||||
|
||||
// We check any early, in case we can skip the rest of the rows.
|
||||
if (CombineSSEBitsToDWORD(hasAnyCursor) != 0) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
}
|
||||
|
||||
// Now let's sum up the bits.
|
||||
if (CombineSSEBitsToDWORD(hasZeroCursor) != 0) {
|
||||
return CHECKALPHA_ZERO;
|
||||
} else {
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR1555SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
const __m128i mask = _mm_set1_epi16(1);
|
||||
const __m128i mask = _mm_set1_epi16((short)0x000F);
|
||||
|
||||
const __m128i *p = (const __m128i *)pixelData;
|
||||
const int w8 = w / 8;
|
||||
@ -538,7 +478,32 @@ CheckAlphaResult CheckAlphaABGR1555SSE2(const u32 *pixelData, int stride, int w,
|
||||
|
||||
__m128i result = _mm_xor_si128(bits, mask);
|
||||
if (CombineSSEBitsToDWORD(result) != 0) {
|
||||
return CHECKALPHA_ZERO;
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride8;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR1555SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
const __m128i mask = _mm_set1_epi16((short)0x0001);
|
||||
|
||||
const __m128i *p = (const __m128i *)pixelData;
|
||||
const int w8 = w / 8;
|
||||
const int stride8 = stride / 8;
|
||||
|
||||
__m128i bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w8; ++i) {
|
||||
const __m128i a = _mm_load_si128(&p[i]);
|
||||
bits = _mm_and_si128(bits, a);
|
||||
}
|
||||
|
||||
__m128i result = _mm_xor_si128(bits, mask);
|
||||
if (CombineSSEBitsToDWORD(result) != 0) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride8;
|
||||
@ -548,43 +513,28 @@ CheckAlphaResult CheckAlphaABGR1555SSE2(const u32 *pixelData, int stride, int w,
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA4444SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i full = _mm_set1_epi16(0x000F);
|
||||
const __m128i mask = _mm_set1_epi16((short)0xF000);
|
||||
|
||||
const __m128i *p = (const __m128i *)pixelData;
|
||||
const int w8 = w / 8;
|
||||
const int stride8 = stride / 8;
|
||||
|
||||
__m128i hasZeroCursor = _mm_setzero_si128();
|
||||
__m128i bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
__m128i hasAnyCursor = _mm_setzero_si128();
|
||||
|
||||
for (int i = 0; i < w8; ++i) {
|
||||
const __m128i a = _mm_srli_epi16(_mm_load_si128(&p[i]), 12);
|
||||
|
||||
const __m128i isZero = _mm_cmpeq_epi16(a, zero);
|
||||
hasZeroCursor = _mm_or_si128(hasZeroCursor, isZero);
|
||||
|
||||
// If a = F, isNotFull will be 0 -> hasAny will be 0.
|
||||
// If a = 0, a & isNotFull will be 0 -> hasAny will be 0.
|
||||
// In any other case, hasAny will have some bits set.
|
||||
const __m128i isNotFull = _mm_cmplt_epi32(a, full);
|
||||
hasAnyCursor = _mm_or_si128(hasAnyCursor, _mm_and_si128(a, isNotFull));
|
||||
const __m128i a = _mm_load_si128(&p[i]);
|
||||
bits = _mm_and_si128(bits, a);
|
||||
}
|
||||
p += stride8;
|
||||
|
||||
// We check any early, in case we can skip the rest of the rows.
|
||||
if (CombineSSEBitsToDWORD(hasAnyCursor) != 0) {
|
||||
__m128i result = _mm_xor_si128(bits, mask);
|
||||
if (CombineSSEBitsToDWORD(result) != 0) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride8;
|
||||
}
|
||||
|
||||
// Now let's sum up the bits.
|
||||
if (CombineSSEBitsToDWORD(hasZeroCursor) != 0) {
|
||||
return CHECKALPHA_ZERO;
|
||||
} else {
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA5551SSE2(const u32 *pixelData, int stride, int w, int h) {
|
||||
@ -603,7 +553,7 @@ CheckAlphaResult CheckAlphaRGBA5551SSE2(const u32 *pixelData, int stride, int w,
|
||||
|
||||
__m128i result = _mm_xor_si128(bits, mask);
|
||||
if (CombineSSEBitsToDWORD(result) != 0) {
|
||||
return CHECKALPHA_ZERO;
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride8;
|
||||
@ -625,26 +575,22 @@ CheckAlphaResult CheckAlphaRGBA8888Basic(const u32 *pixelData, int stride, int w
|
||||
#endif
|
||||
}
|
||||
|
||||
u32 hitZeroAlpha = 0;
|
||||
|
||||
const u32 *p = pixelData;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
u32 bits = 0xFF000000;
|
||||
for (int i = 0; i < w; ++i) {
|
||||
u32 a = p[i] & 0xFF000000;
|
||||
hitZeroAlpha |= a ^ 0xFF000000;
|
||||
if (a != 0xFF000000 && a != 0) {
|
||||
// We're done, we hit non-zero, non-full alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
bits &= p[i];
|
||||
}
|
||||
|
||||
if (bits != 0xFF000000) {
|
||||
// We're done, we hit non-full alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
if (hitZeroAlpha) {
|
||||
return CHECKALPHA_ZERO;
|
||||
} else {
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR4444Basic(const u32 *pixelData, int stride, int w, int h) {
|
||||
@ -659,29 +605,25 @@ CheckAlphaResult CheckAlphaABGR4444Basic(const u32 *pixelData, int stride, int w
|
||||
#endif
|
||||
}
|
||||
|
||||
u32 hitZeroAlpha = 0;
|
||||
|
||||
const u32 *p = pixelData;
|
||||
const int w2 = (w + 1) / 2;
|
||||
const int stride2 = (stride + 1) / 2;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
u32 bits = 0x000F000F;
|
||||
for (int i = 0; i < w2; ++i) {
|
||||
u32 a = p[i] & 0x000F000F;
|
||||
hitZeroAlpha |= a ^ 0x000F000F;
|
||||
if (a != 0x000F000F && a != 0x0000000F && a != 0x000F0000 && a != 0) {
|
||||
// We're done, we hit non-zero, non-full alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
bits &= p[i];
|
||||
}
|
||||
|
||||
if (bits != 0x000F000F) {
|
||||
// We're done, we hit non-full alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride2;
|
||||
}
|
||||
|
||||
if (hitZeroAlpha) {
|
||||
return CHECKALPHA_ZERO;
|
||||
} else {
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w, int h) {
|
||||
@ -700,14 +642,14 @@ CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w
|
||||
const int w2 = (w + 1) / 2;
|
||||
const int stride2 = (stride + 1) / 2;
|
||||
|
||||
u32 bits = 0x00010001;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
u32 bits = 0x00010001;
|
||||
for (int i = 0; i < w2; ++i) {
|
||||
bits &= p[i];
|
||||
}
|
||||
|
||||
if ((bits ^ 0x00010001) != 0) {
|
||||
return CHECKALPHA_ZERO;
|
||||
if (bits != 0x00010001) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride2;
|
||||
@ -717,59 +659,62 @@ CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA4444Basic(const u32 *pixelData, int stride, int w, int h) {
|
||||
#ifdef _M_SSE
|
||||
// Use SSE if aligned to 16 bytes / 8 pixels (usually the case.)
|
||||
if ((w & 7) == 0 && (stride & 7) == 0) {
|
||||
#ifdef _M_SSE
|
||||
return CheckAlphaRGBA4444SSE2(pixelData, stride, w, h);
|
||||
}
|
||||
#endif
|
||||
|
||||
u32 hitZeroAlpha = 0;
|
||||
|
||||
const u32 *p = pixelData;
|
||||
const int w2 = (w + 1) / 2;
|
||||
const int stride2 = (stride + 1) / 2;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w2; ++i) {
|
||||
u32 a = p[i] & 0xF000F000;
|
||||
hitZeroAlpha |= a ^ 0xF000F000;
|
||||
if (a != 0xF000F000 && a != 0xF0000000 && a != 0x0000F000 && a != 0) {
|
||||
// We're done, we hit non-zero, non-full alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARMV7) || PPSSPP_ARCH(ARM64)
|
||||
if (cpu_info.bNEON) {
|
||||
return CheckAlphaRGBA4444NEON(pixelData, stride, w, h);
|
||||
}
|
||||
p += stride2;
|
||||
}
|
||||
|
||||
if (hitZeroAlpha) {
|
||||
return CHECKALPHA_ZERO;
|
||||
} else {
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA5551Basic(const u32 *pixelData, int stride, int w, int h) {
|
||||
#ifdef _M_SSE
|
||||
// Use SSE if aligned to 16 bytes / 8 pixels (usually the case.)
|
||||
if ((w & 7) == 0 && (stride & 7) == 0) {
|
||||
return CheckAlphaRGBA5551SSE2(pixelData, stride, w, h);
|
||||
}
|
||||
#endif
|
||||
|
||||
u32 bits = 0x80008000;
|
||||
}
|
||||
|
||||
const u32 *p = pixelData;
|
||||
const int w2 = (w + 1) / 2;
|
||||
const int stride2 = (stride + 1) / 2;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
u32 bits = 0xF000F000;
|
||||
for (int i = 0; i < w2; ++i) {
|
||||
bits &= p[i];
|
||||
}
|
||||
|
||||
if ((bits ^ 0x80008000) != 0) {
|
||||
return CHECKALPHA_ZERO;
|
||||
if (bits != 0xF000F000) {
|
||||
// We're done, we hit non-full alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride2;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA5551Basic(const u32 *pixelData, int stride, int w, int h) {
|
||||
// Use SSE if aligned to 16 bytes / 8 pixels (usually the case.)
|
||||
if ((w & 7) == 0 && (stride & 7) == 0) {
|
||||
#ifdef _M_SSE
|
||||
return CheckAlphaRGBA5551SSE2(pixelData, stride, w, h);
|
||||
#elif PPSSPP_ARCH(ARMV7) || PPSSPP_ARCH(ARM64)
|
||||
if (cpu_info.bNEON) {
|
||||
return CheckAlphaRGBA5551NEON(pixelData, stride, w, h);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
const u32 *p = pixelData;
|
||||
const int w2 = (w + 1) / 2;
|
||||
const int stride2 = (stride + 1) / 2;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
u32 bits = 0x80008000;
|
||||
for (int i = 0; i < w2; ++i) {
|
||||
bits &= p[i];
|
||||
}
|
||||
|
||||
if (bits != 0x80008000) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride2;
|
||||
|
@ -21,7 +21,6 @@ enum CheckAlphaResult {
|
||||
// These are intended to line up with TexCacheEntry::STATUS_ALPHA_UNKNOWN, etc.
|
||||
CHECKALPHA_FULL = 0,
|
||||
CHECKALPHA_ANY = 4,
|
||||
CHECKALPHA_ZERO = 8,
|
||||
};
|
||||
|
||||
#include "Common/Common.h"
|
||||
|
@ -277,102 +277,107 @@ static inline bool VectorIsNonZeroNEON(const uint16x8_t &v) {
|
||||
#endif
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const uint32x4_t zero = vdupq_n_u32(0);
|
||||
const uint32x4_t full = vdupq_n_u32(0xFF);
|
||||
|
||||
const u32 *p = (const u32 *)pixelData;
|
||||
|
||||
// Have alpha values == 0 been seen?
|
||||
uint32x4_t foundAZero = zero;
|
||||
|
||||
const uint32x4_t mask = vdupq_n_u32(0xFF000000);
|
||||
uint32x4_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
// Have alpha values > 0 and < 0xFF been seen?
|
||||
uint32x4_t foundFraction = zero;
|
||||
|
||||
for (int i = 0; i < w; i += 4) {
|
||||
const uint32x4_t a = vshrq_n_u32(vld1q_u32(&p[i]), 24);
|
||||
|
||||
const uint32x4_t isZero = vceqq_u32(a, zero);
|
||||
foundAZero = vorrq_u32(foundAZero, isZero);
|
||||
|
||||
// If a = FF, isNotFull will be 0 -> foundFraction will be 0.
|
||||
// If a = 00, a & isNotFull will be 0 -> foundFraction will be 0.
|
||||
// In any other case, foundFraction will have some bits set.
|
||||
const uint32x4_t isNotFull = vcltq_u32(a, full);
|
||||
foundFraction = vorrq_u32(foundFraction, vandq_u32(a, isNotFull));
|
||||
const uint32x4_t a = vld1q_u32(&p[i]);
|
||||
bits = vandq_u32(bits, a);
|
||||
}
|
||||
p += stride;
|
||||
|
||||
// We check any early, in case we can skip the rest of the rows.
|
||||
if (VectorIsNonZeroNEON(foundFraction)) {
|
||||
uint32x4_t result = veorq_u32(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
// Now let's sum up the bits.
|
||||
if (VectorIsNonZeroNEON(foundAZero)) {
|
||||
return CHECKALPHA_ZERO;
|
||||
} else {
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const uint16x8_t zero = vdupq_n_u16(0);
|
||||
const uint16x8_t full = vdupq_n_u16(0xF000);
|
||||
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
// Have alpha values == 0 been seen?
|
||||
uint16x8_t foundAZero = zero;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
// Have alpha values > 0 and < 0xFF been seen?
|
||||
uint16x8_t foundFraction = zero;
|
||||
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vshlq_n_u16(vld1q_u16(&p[i]), 12);
|
||||
|
||||
const uint16x8_t isZero = vceqq_u16(a, zero);
|
||||
foundAZero = vorrq_u16(foundAZero, isZero);
|
||||
|
||||
// If a = F, isNotFull will be 0 -> foundFraction will be 0.
|
||||
// If a = 0, a & isNotFull will be 0 -> foundFraction will be 0.
|
||||
// In any other case, foundFraction will have some bits set.
|
||||
const uint16x8_t isNotFull = vcltq_u16(a, full);
|
||||
foundFraction = vorrq_u16(foundFraction, vandq_u16(a, isNotFull));
|
||||
}
|
||||
p += stride;
|
||||
|
||||
// We check any early, in case we can skip the rest of the rows.
|
||||
if (VectorIsNonZeroNEON(foundFraction)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
}
|
||||
|
||||
// Now let's sum up the bits.
|
||||
if (VectorIsNonZeroNEON(foundAZero)) {
|
||||
return CHECKALPHA_ZERO;
|
||||
} else {
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR1555NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
const uint16x8_t mask = vdupq_n_u16(1);
|
||||
uint16x8_t bits = vdupq_n_u16(1);
|
||||
const uint16x8_t mask = vdupq_n_u16((u16)0x000F);
|
||||
uint16x8_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vld1q_u16(&p[i]);
|
||||
|
||||
bits = vandq_u16(bits, a);
|
||||
}
|
||||
|
||||
uint16x8_t result = veorq_u16(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ZERO;
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR1555NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
const uint16x8_t mask = vdupq_n_u16((u16)0x0001);
|
||||
uint16x8_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vld1q_u16(&p[i]);
|
||||
bits = vandq_u16(bits, a);
|
||||
}
|
||||
|
||||
uint16x8_t result = veorq_u16(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA4444NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
const uint16x8_t mask = vdupq_n_u16((u16)0xF000);
|
||||
uint16x8_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vld1q_u16(&p[i]);
|
||||
bits = vandq_u16(bits, a);
|
||||
}
|
||||
|
||||
uint16x8_t result = veorq_u16(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA5551NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
const uint16x8_t mask = vdupq_n_u16((u16)0x8000);
|
||||
uint16x8_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vld1q_u16(&p[i]);
|
||||
bits = vandq_u16(bits, a);
|
||||
}
|
||||
|
||||
uint16x8_t result = veorq_u16(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
|
@ -24,3 +24,5 @@ u32 ReliableHash32NEON(const void *input, size_t len, u32 seed);
|
||||
CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w, int h);
|
||||
CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w, int h);
|
||||
CheckAlphaResult CheckAlphaABGR1555NEON(const u32 *pixelData, int stride, int w, int h);
|
||||
CheckAlphaResult CheckAlphaRGBA4444NEON(const u32 *pixelData, int stride, int w, int h);
|
||||
CheckAlphaResult CheckAlphaRGBA5551NEON(const u32 *pixelData, int stride, int w, int h);
|
||||
|
@ -447,14 +447,12 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra
|
||||
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, GetClutDestFormatD3D11(clutFormat), clutTotalColors, clutTotalColors, 1);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
gstate_c.SetTextureSimpleAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_SIMPLE);
|
||||
} else {
|
||||
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;
|
||||
|
||||
framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
|
||||
|
||||
gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
|
||||
gstate_c.SetTextureSimpleAlpha(gstate_c.textureFullAlpha);
|
||||
framebufferManagerD3D11_->RebindFramebuffer(); // Probably not necessary.
|
||||
}
|
||||
SamplerCacheKey samplerKey;
|
||||
@ -759,6 +757,14 @@ void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &
|
||||
bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS);
|
||||
DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false, false, expand32);
|
||||
|
||||
// We check before scaling since scaling shouldn't invent alpha from a full alpha texture.
|
||||
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / bpp, w, h);
|
||||
entry.SetAlphaStatus(alphaStatus, level);
|
||||
} else {
|
||||
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
|
||||
}
|
||||
|
||||
if (scaleFactor > 1) {
|
||||
u32 scaleFmt = (u32)dstFmt;
|
||||
scaler.ScaleAlways((u32 *)mapData, pixelData, scaleFmt, w, h, scaleFactor);
|
||||
@ -780,13 +786,6 @@ void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &
|
||||
}
|
||||
}
|
||||
|
||||
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / bpp, w, h);
|
||||
entry.SetAlphaStatus(alphaStatus, level);
|
||||
} else {
|
||||
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
|
||||
}
|
||||
|
||||
if (replacer_.Enabled()) {
|
||||
ReplacedTextureDecodeInfo replacedInfo;
|
||||
replacedInfo.cachekey = entry.CacheKey();
|
||||
|
@ -453,14 +453,12 @@ void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFrame
|
||||
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
gstate_c.SetTextureSimpleAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_SIMPLE);
|
||||
} else {
|
||||
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;
|
||||
|
||||
framebufferManagerDX9_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
|
||||
|
||||
gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
|
||||
gstate_c.SetTextureSimpleAlpha(gstate_c.textureFullAlpha);
|
||||
}
|
||||
|
||||
framebufferManagerDX9_->RebindFramebuffer();
|
||||
@ -733,6 +731,14 @@ void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &re
|
||||
|
||||
DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false, false, false);
|
||||
|
||||
// We check before scaling since scaling shouldn't invent alpha from a full alpha texture.
|
||||
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / bpp, w, h);
|
||||
entry.SetAlphaStatus(alphaStatus, level);
|
||||
} else {
|
||||
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
|
||||
}
|
||||
|
||||
if (scaleFactor > 1) {
|
||||
scaler.ScaleAlways((u32 *)rect.pBits, pixelData, dstFmt, w, h, scaleFactor);
|
||||
pixelData = (u32 *)rect.pBits;
|
||||
@ -752,13 +758,6 @@ void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &re
|
||||
}
|
||||
}
|
||||
|
||||
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / bpp, w, h);
|
||||
entry.SetAlphaStatus(alphaStatus, level);
|
||||
} else {
|
||||
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
|
||||
}
|
||||
|
||||
if (replacer_.Enabled()) {
|
||||
ReplacedTextureDecodeInfo replacedInfo;
|
||||
replacedInfo.cachekey = entry.CacheKey();
|
||||
|
@ -508,14 +508,12 @@ void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFram
|
||||
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
gstate_c.SetTextureSimpleAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_SIMPLE);
|
||||
} else {
|
||||
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;
|
||||
|
||||
framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
|
||||
|
||||
gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
|
||||
gstate_c.SetTextureSimpleAlpha(gstate_c.textureFullAlpha);
|
||||
}
|
||||
|
||||
framebufferManagerGL_->RebindFramebuffer();
|
||||
@ -869,11 +867,9 @@ void TextureCacheGLES::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &r
|
||||
|
||||
// Textures are always aligned to 16 bytes bufw, so this could safely be 4 always.
|
||||
texByteAlign = dstFmt == GL_UNSIGNED_BYTE ? 4 : 2;
|
||||
|
||||
pixelData = (u32 *)finalBuf;
|
||||
if (scaleFactor > 1)
|
||||
scaler.Scale(pixelData, dstFmt, w, h, scaleFactor);
|
||||
|
||||
// We check before scaling since scaling shouldn't invent alpha from a full alpha texture.
|
||||
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, useUnpack ? bufw : w, w, h);
|
||||
entry.SetAlphaStatus(alphaStatus, level);
|
||||
@ -881,6 +877,9 @@ void TextureCacheGLES::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &r
|
||||
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
|
||||
}
|
||||
|
||||
if (scaleFactor > 1)
|
||||
scaler.Scale(pixelData, dstFmt, w, h, scaleFactor);
|
||||
|
||||
if (replacer_.Enabled()) {
|
||||
ReplacedTextureDecodeInfo replacedInfo;
|
||||
replacedInfo.cachekey = entry.CacheKey();
|
||||
|
@ -255,9 +255,8 @@ void GPUStateCache::DoState(PointerWrap &p) {
|
||||
// needShaderTexClamp and bgraTexture don't need to be saved.
|
||||
|
||||
if (s >= 3) {
|
||||
p.Do(textureSimpleAlpha);
|
||||
} else {
|
||||
textureSimpleAlpha = false;
|
||||
bool oldTextureSimpleAlpha = false;
|
||||
p.Do(oldTextureSimpleAlpha);
|
||||
}
|
||||
|
||||
if (s < 2) {
|
||||
|
@ -513,12 +513,6 @@ struct GPUStateCache {
|
||||
Dirty(DIRTY_FRAGMENTSHADER_STATE);
|
||||
}
|
||||
}
|
||||
void SetTextureSimpleAlpha(bool simpleAlpha) {
|
||||
if (simpleAlpha != textureSimpleAlpha) {
|
||||
textureSimpleAlpha = simpleAlpha;
|
||||
Dirty(DIRTY_FRAGMENTSHADER_STATE);
|
||||
}
|
||||
}
|
||||
void SetNeedShaderTexclamp(bool need) {
|
||||
if (need != needShaderTexClamp) {
|
||||
needShaderTexClamp = need;
|
||||
@ -541,7 +535,6 @@ struct GPUStateCache {
|
||||
uint64_t dirty;
|
||||
|
||||
bool textureFullAlpha;
|
||||
bool textureSimpleAlpha;
|
||||
bool vertexFullAlpha;
|
||||
|
||||
int skipDrawReason;
|
||||
|
@ -432,7 +432,6 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr
|
||||
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors, clutTotalColors, 1);
|
||||
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
gstate_c.SetTextureSimpleAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_SIMPLE);
|
||||
|
||||
framebufferManager_->RebindFramebuffer();
|
||||
draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0);
|
||||
@ -448,7 +447,6 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr
|
||||
imageView_ = framebufferManagerVulkan_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
|
||||
|
||||
gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
|
||||
gstate_c.SetTextureSimpleAlpha(gstate_c.textureFullAlpha);
|
||||
}
|
||||
|
||||
SamplerCacheKey samplerKey;
|
||||
@ -695,7 +693,6 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry, bool replaceIm
|
||||
entry->vkTex->texture_->EndCreate(cmdInit);
|
||||
|
||||
gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
gstate_c.SetTextureSimpleAlpha(entry->GetAlphaStatus() != TexCacheEntry::STATUS_ALPHA_UNKNOWN);
|
||||
}
|
||||
|
||||
VkFormat TextureCacheVulkan::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const {
|
||||
@ -767,6 +764,16 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt
|
||||
DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false, false, false);
|
||||
gpuStats.numTexturesDecoded++;
|
||||
|
||||
// We check before scaling since scaling shouldn't invent alpha from a full alpha texture.
|
||||
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
// TODO: When we decode directly, this can be more expensive (maybe not on mobile?)
|
||||
// This does allow us to skip alpha testing, though.
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / bpp, w, h);
|
||||
entry.SetAlphaStatus(alphaStatus, level);
|
||||
} else {
|
||||
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
|
||||
}
|
||||
|
||||
if (scaleFactor > 1) {
|
||||
u32 fmt = dstFmt;
|
||||
scaler.ScaleAlways((u32 *)writePtr, pixelData, fmt, w, h, scaleFactor);
|
||||
@ -787,13 +794,6 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt
|
||||
decPitch = rowPitch;
|
||||
}
|
||||
}
|
||||
|
||||
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / bpp, w, h);
|
||||
entry.SetAlphaStatus(alphaStatus, level);
|
||||
} else {
|
||||
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user