mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 21:39:52 +00:00
Merge pull request #16533 from hrydgard/texture-decode-overrun-fix
OpenGL: Fix case in tex decoder where we could write off the end of a buffer
This commit is contained in:
commit
24b62465b7
@ -282,7 +282,7 @@ public:
|
||||
UI::Event OnChoice;
|
||||
|
||||
protected:
|
||||
bool HasTitleBar() const { return false; }
|
||||
bool HasTitleBar() const override { return false; }
|
||||
|
||||
private:
|
||||
const ContextMenuItem *items_;
|
||||
|
@ -1556,15 +1556,16 @@ static CheckAlphaResult DecodeDXTBlocks(uint8_t *out, int outPitch, uint32_t tex
|
||||
u32 blockIndex = (y / 4) * (bufw / 4);
|
||||
int blockHeight = std::min(h - y, 4);
|
||||
for (int x = 0; x < minw; x += 4) {
|
||||
int blockWidth = std::min(minw - x, 4);
|
||||
switch (n) {
|
||||
case 1:
|
||||
DecodeDXT1Block(dst + outPitch32 * y + x, (const DXT1Block *)src + blockIndex, outPitch32, blockHeight, &alphaSum);
|
||||
DecodeDXT1Block(dst + outPitch32 * y + x, (const DXT1Block *)src + blockIndex, outPitch32, blockWidth, blockHeight, &alphaSum);
|
||||
break;
|
||||
case 3:
|
||||
DecodeDXT3Block(dst + outPitch32 * y + x, (const DXT3Block *)src + blockIndex, outPitch32, blockHeight);
|
||||
DecodeDXT3Block(dst + outPitch32 * y + x, (const DXT3Block *)src + blockIndex, outPitch32, blockWidth, blockHeight);
|
||||
break;
|
||||
case 5:
|
||||
DecodeDXT5Block(dst + outPitch32 * y + x, (const DXT5Block *)src + blockIndex, outPitch32, blockHeight);
|
||||
DecodeDXT5Block(dst + outPitch32 * y + x, (const DXT5Block *)src + blockIndex, outPitch32, blockWidth, blockHeight);
|
||||
break;
|
||||
}
|
||||
blockIndex++;
|
||||
@ -1673,7 +1674,9 @@ CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, G
|
||||
case GE_CMODE_16BIT_ABGR5551:
|
||||
case GE_CMODE_16BIT_ABGR4444:
|
||||
{
|
||||
if (clutAlphaLinear_ && mipmapShareClut && !expandTo32bit) {
|
||||
// The w > 1 check is to not need a case that handles a single pixel
|
||||
// in DeIndexTexture4Optimal<u16>.
|
||||
if (clutAlphaLinear_ && mipmapShareClut && !expandTo32bit && w >= 4) {
|
||||
// We don't bother with fullalpha here (clutAlphaLinear_)
|
||||
// Here, reverseColors means the CLUT is already reversed.
|
||||
if (reverseColors) {
|
||||
|
@ -421,9 +421,9 @@ class DXTDecoder {
|
||||
public:
|
||||
inline void DecodeColors(const DXT1Block *src, bool ignore1bitAlpha);
|
||||
inline void DecodeAlphaDXT5(const DXT5Block *src);
|
||||
inline void WriteColorsDXT1(u32 *dst, const DXT1Block *src, int pitch, int height);
|
||||
inline void WriteColorsDXT3(u32 *dst, const DXT3Block *src, int pitch, int height);
|
||||
inline void WriteColorsDXT5(u32 *dst, const DXT5Block *src, int pitch, int height);
|
||||
inline void WriteColorsDXT1(u32 *dst, const DXT1Block *src, int pitch, int width, int height);
|
||||
inline void WriteColorsDXT3(u32 *dst, const DXT3Block *src, int pitch, int width, int height);
|
||||
inline void WriteColorsDXT5(u32 *dst, const DXT5Block *src, int pitch, int width, int height);
|
||||
|
||||
bool AnyNonFullAlpha() const { return anyNonFullAlpha_; }
|
||||
|
||||
@ -507,11 +507,11 @@ void DXTDecoder::DecodeAlphaDXT5(const DXT5Block *src) {
|
||||
}
|
||||
}
|
||||
|
||||
void DXTDecoder::WriteColorsDXT1(u32 *dst, const DXT1Block *src, int pitch, int height) {
|
||||
void DXTDecoder::WriteColorsDXT1(u32 *dst, const DXT1Block *src, int pitch, int width, int height) {
|
||||
bool anyColor3 = false;
|
||||
for (int y = 0; y < height; y++) {
|
||||
int colordata = src->lines[y];
|
||||
for (int x = 0; x < 4; x++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
int col = colordata & 3;
|
||||
if (col == 3) {
|
||||
anyColor3 = true;
|
||||
@ -527,11 +527,11 @@ void DXTDecoder::WriteColorsDXT1(u32 *dst, const DXT1Block *src, int pitch, int
|
||||
}
|
||||
}
|
||||
|
||||
void DXTDecoder::WriteColorsDXT3(u32 *dst, const DXT3Block *src, int pitch, int height) {
|
||||
void DXTDecoder::WriteColorsDXT3(u32 *dst, const DXT3Block *src, int pitch, int width, int height) {
|
||||
for (int y = 0; y < height; y++) {
|
||||
int colordata = src->color.lines[y];
|
||||
u32 alphadata = src->alphaLines[y];
|
||||
for (int x = 0; x < 4; x++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
dst[x] = colors_[colordata & 3] | (alphadata << 28);
|
||||
colordata >>= 2;
|
||||
alphadata >>= 4;
|
||||
@ -540,13 +540,13 @@ void DXTDecoder::WriteColorsDXT3(u32 *dst, const DXT3Block *src, int pitch, int
|
||||
}
|
||||
}
|
||||
|
||||
void DXTDecoder::WriteColorsDXT5(u32 *dst, const DXT5Block *src, int pitch, int height) {
|
||||
void DXTDecoder::WriteColorsDXT5(u32 *dst, const DXT5Block *src, int pitch, int width, int height) {
|
||||
// 48 bits, 3 bit index per pixel, 12 bits per line.
|
||||
u64 alphadata = ((u64)(u16)src->alphadata1 << 32) | (u32)src->alphadata2;
|
||||
|
||||
for (int y = 0; y < height; y++) {
|
||||
int colordata = src->color.lines[y];
|
||||
for (int x = 0; x < 4; x++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
dst[x] = colors_[colordata & 3] | (alpha_[alphadata & 7] << 24);
|
||||
colordata >>= 2;
|
||||
alphadata >>= 3;
|
||||
@ -619,24 +619,24 @@ uint32_t GetDXT5Texel(const DXT5Block *src, int x, int y) {
|
||||
}
|
||||
|
||||
// This could probably be done faster by decoding two or four blocks at a time with SSE/NEON.
|
||||
void DecodeDXT1Block(u32 *dst, const DXT1Block *src, int pitch, int height, u32 *alpha) {
|
||||
void DecodeDXT1Block(u32 *dst, const DXT1Block *src, int pitch, int width, int height, u32 *alpha) {
|
||||
DXTDecoder dxt;
|
||||
dxt.DecodeColors(src, false);
|
||||
dxt.WriteColorsDXT1(dst, src, pitch, height);
|
||||
dxt.WriteColorsDXT1(dst, src, pitch, width, height);
|
||||
*alpha &= dxt.AnyNonFullAlpha() ? 0 : 1;
|
||||
}
|
||||
|
||||
void DecodeDXT3Block(u32 *dst, const DXT3Block *src, int pitch, int height) {
|
||||
void DecodeDXT3Block(u32 *dst, const DXT3Block *src, int pitch, int width, int height) {
|
||||
DXTDecoder dxt;
|
||||
dxt.DecodeColors(&src->color, true);
|
||||
dxt.WriteColorsDXT3(dst, src, pitch, height);
|
||||
dxt.WriteColorsDXT3(dst, src, pitch, width, height);
|
||||
}
|
||||
|
||||
void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int height) {
|
||||
void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int width, int height) {
|
||||
DXTDecoder dxt;
|
||||
dxt.DecodeColors(&src->color, true);
|
||||
dxt.DecodeAlphaDXT5(src);
|
||||
dxt.WriteColorsDXT5(dst, src, pitch, height);
|
||||
dxt.WriteColorsDXT5(dst, src, pitch, width, height);
|
||||
}
|
||||
|
||||
#ifdef _M_SSE
|
||||
|
@ -65,9 +65,9 @@ struct DXT5Block {
|
||||
u8 alpha1; u8 alpha2;
|
||||
};
|
||||
|
||||
void DecodeDXT1Block(u32 *dst, const DXT1Block *src, int pitch, int height, u32 *alpha);
|
||||
void DecodeDXT3Block(u32 *dst, const DXT3Block *src, int pitch, int height);
|
||||
void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int height);
|
||||
void DecodeDXT1Block(u32 *dst, const DXT1Block *src, int pitch, int width, int height, u32 *alpha);
|
||||
void DecodeDXT3Block(u32 *dst, const DXT3Block *src, int pitch, int width, int height);
|
||||
void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int width, int height);
|
||||
|
||||
uint32_t GetDXT1Texel(const DXT1Block *src, int x, int y);
|
||||
uint32_t GetDXT3Texel(const DXT3Block *src, int x, int y);
|
||||
@ -163,22 +163,36 @@ inline void DeIndexTexture4(/*WRITEONLY*/ ClutT *dest, const u8 *indexed, int le
|
||||
|
||||
ClutT alphaSum = (ClutT)(-1);
|
||||
if (nakedIndex) {
|
||||
for (int i = 0; i < length; i += 2) {
|
||||
while (length >= 2) {
|
||||
u8 index = *indexed++;
|
||||
ClutT color0 = clut[index & 0xf];
|
||||
ClutT color1 = clut[index >> 4];
|
||||
dest[i + 0] = color0;
|
||||
dest[i + 1] = color1;
|
||||
*dest++ = color0;
|
||||
*dest++ = color1;
|
||||
alphaSum &= color0 & color1;
|
||||
length -= 2;
|
||||
}
|
||||
if (length) { // Last pixel. Can really only happen in 1xY textures, but making this work generically.
|
||||
u8 index = *indexed++;
|
||||
ClutT color0 = clut[index & 0xf];
|
||||
*dest = color0;
|
||||
alphaSum &= color0;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < length; i += 2) {
|
||||
while (length >= 2) {
|
||||
u8 index = *indexed++;
|
||||
ClutT color0 = clut[gstate.transformClutIndex((index >> 0) & 0xf)];
|
||||
ClutT color1 = clut[gstate.transformClutIndex((index >> 4) & 0xf)];
|
||||
dest[i + 0] = color0;
|
||||
dest[i + 1] = color1;
|
||||
*dest++ = color0;
|
||||
*dest++ = color1;
|
||||
alphaSum &= color0 & color1;
|
||||
length -= 2;
|
||||
}
|
||||
if (length) {
|
||||
u8 index = *indexed++;
|
||||
ClutT color0 = clut[gstate.transformClutIndex((index >> 0) & 0xf)];
|
||||
*dest = color0;
|
||||
alphaSum &= color0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -187,10 +201,15 @@ inline void DeIndexTexture4(/*WRITEONLY*/ ClutT *dest, const u8 *indexed, int le
|
||||
|
||||
template <typename ClutT>
|
||||
inline void DeIndexTexture4Optimal(ClutT *dest, const u8 *indexed, int length, ClutT color) {
|
||||
for (int i = 0; i < length; i += 2) {
|
||||
while (length >= 2) {
|
||||
u8 index = *indexed++;
|
||||
dest[i + 0] = color | ((index >> 0) & 0xf);
|
||||
dest[i + 1] = color | ((index >> 4) & 0xf);
|
||||
*dest++ = color | ((index >> 0) & 0xf);
|
||||
*dest++ = color | ((index >> 4) & 0xf);
|
||||
length -= 2;
|
||||
}
|
||||
if (length) {
|
||||
u8 index = *indexed++;
|
||||
*dest++ = color | ((index >> 0) & 0xf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -898,7 +898,8 @@ inline void Vec3ByMatrix43(float vecOut[3], const float v[3], const float m[12])
|
||||
vecOut[1] = vectorGetByIndex<1>(sum);
|
||||
vecOut[2] = vectorGetByIndex<2>(sum);
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
float32x4_t sum = Vec3ByMatrix43Internal(vld1q_f32(v), m);
|
||||
float vecIn[4] = {v[0], v[1], v[2], 1.0f};
|
||||
float32x4_t sum = Vec3ByMatrix43Internal(vld1q_f32(vecIn), m);
|
||||
vecOut[0] = vgetq_lane_f32(sum, 0);
|
||||
vecOut[1] = vgetq_lane_f32(sum, 1);
|
||||
vecOut[2] = vgetq_lane_f32(sum, 2);
|
||||
@ -957,7 +958,8 @@ inline void Vec3ByMatrix44(float vecOut[4], const float v[3], const float m[16])
|
||||
__m128 sum = Vec3ByMatrix44Internal(x, y, z, m);
|
||||
_mm_storeu_ps(vecOut, sum);
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
float32x4_t sum = Vec3ByMatrix44Internal(vld1q_f32(v), m);
|
||||
float vecIn[4] = {v[0], v[1], v[2], 1.0f};
|
||||
float32x4_t sum = Vec3ByMatrix44Internal(vld1q_f32(vecIn), m);
|
||||
vst1q_f32(vecOut, sum);
|
||||
#else
|
||||
vecOut[0] = v[0] * m[0] + v[1] * m[4] + v[2] * m[8] + m[12];
|
||||
|
Loading…
Reference in New Issue
Block a user