Merge pull request #16533 from hrydgard/texture-decode-overrun-fix

OpenGL: Fix case in tex decoder where we could write off the end of a buffer
This commit is contained in:
Unknown W. Brackets 2022-12-09 16:32:57 -08:00 committed by GitHub
commit 24b62465b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 58 additions and 34 deletions

View File

@ -282,7 +282,7 @@ public:
UI::Event OnChoice;
protected:
bool HasTitleBar() const { return false; }
bool HasTitleBar() const override { return false; }
private:
const ContextMenuItem *items_;

View File

@ -1556,15 +1556,16 @@ static CheckAlphaResult DecodeDXTBlocks(uint8_t *out, int outPitch, uint32_t tex
u32 blockIndex = (y / 4) * (bufw / 4);
int blockHeight = std::min(h - y, 4);
for (int x = 0; x < minw; x += 4) {
int blockWidth = std::min(minw - x, 4);
switch (n) {
case 1:
DecodeDXT1Block(dst + outPitch32 * y + x, (const DXT1Block *)src + blockIndex, outPitch32, blockHeight, &alphaSum);
DecodeDXT1Block(dst + outPitch32 * y + x, (const DXT1Block *)src + blockIndex, outPitch32, blockWidth, blockHeight, &alphaSum);
break;
case 3:
DecodeDXT3Block(dst + outPitch32 * y + x, (const DXT3Block *)src + blockIndex, outPitch32, blockHeight);
DecodeDXT3Block(dst + outPitch32 * y + x, (const DXT3Block *)src + blockIndex, outPitch32, blockWidth, blockHeight);
break;
case 5:
DecodeDXT5Block(dst + outPitch32 * y + x, (const DXT5Block *)src + blockIndex, outPitch32, blockHeight);
DecodeDXT5Block(dst + outPitch32 * y + x, (const DXT5Block *)src + blockIndex, outPitch32, blockWidth, blockHeight);
break;
}
blockIndex++;
@ -1673,7 +1674,9 @@ CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, G
case GE_CMODE_16BIT_ABGR5551:
case GE_CMODE_16BIT_ABGR4444:
{
if (clutAlphaLinear_ && mipmapShareClut && !expandTo32bit) {
// The w > 1 check is to not need a case that handles a single pixel
// in DeIndexTexture4Optimal<u16>.
if (clutAlphaLinear_ && mipmapShareClut && !expandTo32bit && w >= 4) {
// We don't bother with fullalpha here (clutAlphaLinear_)
// Here, reverseColors means the CLUT is already reversed.
if (reverseColors) {

View File

@ -421,9 +421,9 @@ class DXTDecoder {
public:
inline void DecodeColors(const DXT1Block *src, bool ignore1bitAlpha);
inline void DecodeAlphaDXT5(const DXT5Block *src);
inline void WriteColorsDXT1(u32 *dst, const DXT1Block *src, int pitch, int height);
inline void WriteColorsDXT3(u32 *dst, const DXT3Block *src, int pitch, int height);
inline void WriteColorsDXT5(u32 *dst, const DXT5Block *src, int pitch, int height);
inline void WriteColorsDXT1(u32 *dst, const DXT1Block *src, int pitch, int width, int height);
inline void WriteColorsDXT3(u32 *dst, const DXT3Block *src, int pitch, int width, int height);
inline void WriteColorsDXT5(u32 *dst, const DXT5Block *src, int pitch, int width, int height);
bool AnyNonFullAlpha() const { return anyNonFullAlpha_; }
@ -507,11 +507,11 @@ void DXTDecoder::DecodeAlphaDXT5(const DXT5Block *src) {
}
}
void DXTDecoder::WriteColorsDXT1(u32 *dst, const DXT1Block *src, int pitch, int height) {
void DXTDecoder::WriteColorsDXT1(u32 *dst, const DXT1Block *src, int pitch, int width, int height) {
bool anyColor3 = false;
for (int y = 0; y < height; y++) {
int colordata = src->lines[y];
for (int x = 0; x < 4; x++) {
for (int x = 0; x < width; x++) {
int col = colordata & 3;
if (col == 3) {
anyColor3 = true;
@ -527,11 +527,11 @@ void DXTDecoder::WriteColorsDXT1(u32 *dst, const DXT1Block *src, int pitch, int
}
}
void DXTDecoder::WriteColorsDXT3(u32 *dst, const DXT3Block *src, int pitch, int height) {
void DXTDecoder::WriteColorsDXT3(u32 *dst, const DXT3Block *src, int pitch, int width, int height) {
for (int y = 0; y < height; y++) {
int colordata = src->color.lines[y];
u32 alphadata = src->alphaLines[y];
for (int x = 0; x < 4; x++) {
for (int x = 0; x < width; x++) {
dst[x] = colors_[colordata & 3] | (alphadata << 28);
colordata >>= 2;
alphadata >>= 4;
@ -540,13 +540,13 @@ void DXTDecoder::WriteColorsDXT3(u32 *dst, const DXT3Block *src, int pitch, int
}
}
void DXTDecoder::WriteColorsDXT5(u32 *dst, const DXT5Block *src, int pitch, int height) {
void DXTDecoder::WriteColorsDXT5(u32 *dst, const DXT5Block *src, int pitch, int width, int height) {
// 48 bits, 3 bit index per pixel, 12 bits per line.
u64 alphadata = ((u64)(u16)src->alphadata1 << 32) | (u32)src->alphadata2;
for (int y = 0; y < height; y++) {
int colordata = src->color.lines[y];
for (int x = 0; x < 4; x++) {
for (int x = 0; x < width; x++) {
dst[x] = colors_[colordata & 3] | (alpha_[alphadata & 7] << 24);
colordata >>= 2;
alphadata >>= 3;
@ -619,24 +619,24 @@ uint32_t GetDXT5Texel(const DXT5Block *src, int x, int y) {
}
// This could probably be done faster by decoding two or four blocks at a time with SSE/NEON.
void DecodeDXT1Block(u32 *dst, const DXT1Block *src, int pitch, int height, u32 *alpha) {
void DecodeDXT1Block(u32 *dst, const DXT1Block *src, int pitch, int width, int height, u32 *alpha) {
DXTDecoder dxt;
dxt.DecodeColors(src, false);
dxt.WriteColorsDXT1(dst, src, pitch, height);
dxt.WriteColorsDXT1(dst, src, pitch, width, height);
*alpha &= dxt.AnyNonFullAlpha() ? 0 : 1;
}
void DecodeDXT3Block(u32 *dst, const DXT3Block *src, int pitch, int height) {
void DecodeDXT3Block(u32 *dst, const DXT3Block *src, int pitch, int width, int height) {
DXTDecoder dxt;
dxt.DecodeColors(&src->color, true);
dxt.WriteColorsDXT3(dst, src, pitch, height);
dxt.WriteColorsDXT3(dst, src, pitch, width, height);
}
void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int height) {
void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int width, int height) {
DXTDecoder dxt;
dxt.DecodeColors(&src->color, true);
dxt.DecodeAlphaDXT5(src);
dxt.WriteColorsDXT5(dst, src, pitch, height);
dxt.WriteColorsDXT5(dst, src, pitch, width, height);
}
#ifdef _M_SSE

View File

@ -65,9 +65,9 @@ struct DXT5Block {
u8 alpha1; u8 alpha2;
};
void DecodeDXT1Block(u32 *dst, const DXT1Block *src, int pitch, int height, u32 *alpha);
void DecodeDXT3Block(u32 *dst, const DXT3Block *src, int pitch, int height);
void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int height);
void DecodeDXT1Block(u32 *dst, const DXT1Block *src, int pitch, int width, int height, u32 *alpha);
void DecodeDXT3Block(u32 *dst, const DXT3Block *src, int pitch, int width, int height);
void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int width, int height);
uint32_t GetDXT1Texel(const DXT1Block *src, int x, int y);
uint32_t GetDXT3Texel(const DXT3Block *src, int x, int y);
@ -163,22 +163,36 @@ inline void DeIndexTexture4(/*WRITEONLY*/ ClutT *dest, const u8 *indexed, int le
ClutT alphaSum = (ClutT)(-1);
if (nakedIndex) {
for (int i = 0; i < length; i += 2) {
while (length >= 2) {
u8 index = *indexed++;
ClutT color0 = clut[index & 0xf];
ClutT color1 = clut[index >> 4];
dest[i + 0] = color0;
dest[i + 1] = color1;
*dest++ = color0;
*dest++ = color1;
alphaSum &= color0 & color1;
length -= 2;
}
if (length) { // Last pixel. Can really only happen in 1xY textures, but making this work generically.
u8 index = *indexed++;
ClutT color0 = clut[index & 0xf];
*dest = color0;
alphaSum &= color0;
}
} else {
for (int i = 0; i < length; i += 2) {
while (length >= 2) {
u8 index = *indexed++;
ClutT color0 = clut[gstate.transformClutIndex((index >> 0) & 0xf)];
ClutT color1 = clut[gstate.transformClutIndex((index >> 4) & 0xf)];
dest[i + 0] = color0;
dest[i + 1] = color1;
*dest++ = color0;
*dest++ = color1;
alphaSum &= color0 & color1;
length -= 2;
}
if (length) {
u8 index = *indexed++;
ClutT color0 = clut[gstate.transformClutIndex((index >> 0) & 0xf)];
*dest = color0;
alphaSum &= color0;
}
}
@ -187,10 +201,15 @@ inline void DeIndexTexture4(/*WRITEONLY*/ ClutT *dest, const u8 *indexed, int le
template <typename ClutT>
inline void DeIndexTexture4Optimal(ClutT *dest, const u8 *indexed, int length, ClutT color) {
for (int i = 0; i < length; i += 2) {
while (length >= 2) {
u8 index = *indexed++;
dest[i + 0] = color | ((index >> 0) & 0xf);
dest[i + 1] = color | ((index >> 4) & 0xf);
*dest++ = color | ((index >> 0) & 0xf);
*dest++ = color | ((index >> 4) & 0xf);
length -= 2;
}
if (length) {
u8 index = *indexed++;
*dest++ = color | ((index >> 0) & 0xf);
}
}

View File

@ -898,7 +898,8 @@ inline void Vec3ByMatrix43(float vecOut[3], const float v[3], const float m[12])
vecOut[1] = vectorGetByIndex<1>(sum);
vecOut[2] = vectorGetByIndex<2>(sum);
#elif PPSSPP_ARCH(ARM64_NEON)
float32x4_t sum = Vec3ByMatrix43Internal(vld1q_f32(v), m);
float vecIn[4] = {v[0], v[1], v[2], 1.0f};
float32x4_t sum = Vec3ByMatrix43Internal(vld1q_f32(vecIn), m);
vecOut[0] = vgetq_lane_f32(sum, 0);
vecOut[1] = vgetq_lane_f32(sum, 1);
vecOut[2] = vgetq_lane_f32(sum, 2);
@ -957,7 +958,8 @@ inline void Vec3ByMatrix44(float vecOut[4], const float v[3], const float m[16])
__m128 sum = Vec3ByMatrix44Internal(x, y, z, m);
_mm_storeu_ps(vecOut, sum);
#elif PPSSPP_ARCH(ARM64_NEON)
float32x4_t sum = Vec3ByMatrix44Internal(vld1q_f32(v), m);
float vecIn[4] = {v[0], v[1], v[2], 1.0f};
float32x4_t sum = Vec3ByMatrix44Internal(vld1q_f32(vecIn), m);
vst1q_f32(vecOut, sum);
#else
vecOut[0] = v[0] * m[0] + v[1] * m[4] + v[2] * m[8] + m[12];