Merge pull request #4387 from hrydgard/unpack_subimage

Use GL_EXT_unpack_subimage to speed up non-pow-2 texture loads when available
This commit is contained in:
Henrik Rydgård 2013-11-01 12:02:50 -07:00
commit 1347c3b019
2 changed files with 17 additions and 9 deletions

View File

@ -1206,12 +1206,14 @@ GLenum TextureCache::GetDestFormat(GETextureFormat format, GEPaletteFormat clutF
}
}
void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, GLenum dstFmt) {
void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, GLenum dstFmt, int *bufwout) {
void *finalBuf = NULL;
u32 texaddr = gstate.getTextureAddress(level);
int bufw = GetTextureBufw(level, texaddr, format);
if (bufwout)
*bufwout = bufw;
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
const u8 *texptr = Memory::GetPointer(texaddr);
@ -1311,7 +1313,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
case GE_TFMT_8888:
if (!gstate.isTextureSwizzled()) {
// Special case: if we don't need to deal with packing, we don't need to copy.
if (w == bufw) {
if (gl_extensions.EXT_unpack_subimage || w == bufw) {
finalBuf = Memory::GetPointer(texaddr);
} else {
int len = bufw * h;
@ -1325,7 +1327,6 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
tmpTexBuf32.resize(std::max(bufw, w) * h);
finalBuf = UnswizzleFromMem(texaddr, bufw, 4, level);
}
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
break;
case GE_TFMT_DXT1:
@ -1397,7 +1398,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
ERROR_LOG_REPORT(G3D, "NO finalbuf! Will crash!");
}
if (w != bufw) {
if (!gl_extensions.EXT_unpack_subimage && w != bufw) {
int pixelSize;
switch (dstFmt) {
case GL_UNSIGNED_SHORT_4_4_4_4:
@ -1493,7 +1494,8 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
// TODO: Look into using BGRA for 32-bit textures when the GL_EXT_texture_format_BGRA8888 extension is available, as it's faster than RGBA on some chips.
GEPaletteFormat clutformat = gstate.getClutPaletteFormat();
void *finalBuf = DecodeTextureLevel(GETextureFormat(entry.format), clutformat, level, texByteAlign, dstFmt);
int bufw;
void *finalBuf = DecodeTextureLevel(GETextureFormat(entry.format), clutformat, level, texByteAlign, dstFmt, &bufw);
if (finalBuf == NULL) {
return;
}
@ -1504,11 +1506,13 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
gpuStats.numTexturesDecoded++;
// Can restore these and remove the fixup at the end of DecodeTextureLevel on desktop GL and GLES 3.
// glPixelStorei(GL_UNPACK_ROW_LENGTH, bufw);
// glPixelStorei(GL_PACK_ROW_LENGTH, bufw);
bool useUnpack = false;
if (gl_extensions.EXT_unpack_subimage && w != bufw) {
glPixelStorei(GL_UNPACK_ROW_LENGTH, bufw);
useUnpack = true;
}
glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign);
glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign);
int scaleFactor;
//Auto-texture scale upto 5x rendering resolution
@ -1548,6 +1552,10 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
glTexImage2D(GL_TEXTURE_2D, level, components, w, h, 0, components, dstFmt, pixelData);
}
}
if (useUnpack) {
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
}
// Only used by Qt UI?

View File

@ -118,7 +118,7 @@ private:
void UpdateSamplingParams(TexCacheEntry &entry, bool force);
void LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages, GLenum dstFmt);
GLenum GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
void *DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, GLenum dstFmt);
void *DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, GLenum dstFmt, int *bufw = 0);
void CheckAlpha(TexCacheEntry &entry, u32 *pixelData, GLenum dstFmt, int w, int h);
template <typename T>
const T *GetCurrentClut();