Update bufw handling in all gpus.

This fixes the softgpu as well, at least.
This commit is contained in:
Unknown W. Brackets 2013-09-15 21:27:13 -07:00
parent 8ca31f7652
commit f43997a47f
10 changed files with 124 additions and 108 deletions

View File

@ -1002,6 +1002,8 @@ add_library(GPU OBJECT
GPU/Common/VertexDecoderCommon.h
GPU/Common/IndexGenerator.cpp
GPU/Common/IndexGenerator.h
GPU/Common/TextureDecoder.cpp
GPU/Common/TextureDecoder.h
GPU/GLES/GLES_GPU.cpp
GPU/GLES/GLES_GPU.h
GPU/GLES/FragmentShaderGenerator.cpp

View File

@ -0,0 +1,20 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "GPU/Common/TextureDecoder.h"
// TODO: Move some common things into here.

View File

@ -0,0 +1,74 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "Core/MemMap.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
static const u8 textureBitsPerPixel[16] = {
16, //GE_TFMT_5650,
16, //GE_TFMT_5551,
16, //GE_TFMT_4444,
32, //GE_TFMT_8888,
4, //GE_TFMT_CLUT4,
8, //GE_TFMT_CLUT8,
16, //GE_TFMT_CLUT16,
32, //GE_TFMT_CLUT32,
4, //GE_TFMT_DXT1,
8, //GE_TFMT_DXT3,
8, //GE_TFMT_DXT5,
0, // INVALID,
0, // INVALID,
0, // INVALID,
0, // INVALID,
0, // INVALID,
};
// Masks to downalign bufw to 16 bytes, and wrap at 2048.
static const u32 textureAlignMask16[16] = {
0x7FF & ~(((8 * 16) / 16) - 1), //GE_TFMT_5650,
0x7FF & ~(((8 * 16) / 16) - 1), //GE_TFMT_5551,
0x7FF & ~(((8 * 16) / 16) - 1), //GE_TFMT_4444,
0x7FF & ~(((8 * 16) / 32) - 1), //GE_TFMT_8888,
0x7FF & ~(((8 * 16) / 4) - 1), //GE_TFMT_CLUT4,
0x7FF & ~(((8 * 16) / 8) - 1), //GE_TFMT_CLUT8,
0x7FF & ~(((8 * 16) / 16) - 1), //GE_TFMT_CLUT16,
0x7FF & ~(((8 * 16) / 32) - 1), //GE_TFMT_CLUT32,
0x7FF & ~(((8 * 16) / 4) - 1), //GE_TFMT_DXT1,
0x7FF & ~(((8 * 16) / 8) - 1), //GE_TFMT_DXT3,
0x7FF & ~(((8 * 16) / 8) - 1), //GE_TFMT_DXT5,
0, // INVALID,
0, // INVALID,
0, // INVALID,
0, // INVALID,
0, // INVALID,
};
static inline u32 GetTextureBufw(int level, u32 texaddr, GETextureFormat format) {
// This is a hack to allow for us to draw the huge PPGe texture, which is always in kernel ram.
if (texaddr < PSP_GetUserMemoryBase())
return gstate.texbufwidth[level] & 0x1FFF;
u32 bufw = gstate.texbufwidth[level] & textureAlignMask16[format];
if (bufw == 0) {
// If it's less than 16 bytes, use 16 bytes.
bufw = (8 * 16) / textureBitsPerPixel[format];
}
return bufw;
}

View File

@ -24,6 +24,7 @@
#include "GPU/GPUState.h"
#include "GPU/Directx9/TextureCacheDX9.h"
#include "GPU/Directx9/FramebufferDX9.h"
#include "GPU/Common/TextureDecoder.h"
#include "Core/Config.h"
#include "ext/xxhash.h"
@ -44,12 +45,6 @@ namespace DX9 {
#define TEXCACHE_DECIMATION_INTERVAL 13
extern int g_iNumVideos;
static inline u32 GetLevelBufw(int level, u32 texaddr) {
// Special rules for kernel textures (PPGe):
if (texaddr < PSP_GetUserMemoryBase())
return gstate.texbufwidth[level] & 0x1FFF;
return gstate.texbufwidth[level] & 0x7FF;
}
TextureCacheDX9::TextureCacheDX9() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) {
lastBoundTexture = INVALID_TEX;
@ -407,8 +402,7 @@ inline void DeIndexTexture4Optimal(ClutT *dest, const u32 texaddr, int length, C
DeIndexTexture4Optimal(dest, indexed, length, color);
}
void *TextureCacheDX9::readIndexedTex(int level, u32 texaddr, int bytesPerIndex, u32 dstFmt) {
int bufw = GetLevelBufw(level, texaddr);
void *TextureCacheDX9::ReadIndexedTex(int level, u32 texaddr, int bytesPerIndex, u32 dstFmt, int bufw) {
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
int length = bufw * h;
@ -801,25 +795,6 @@ void TextureCacheDX9::StartFrame() {
}
}
static const u8 bitsPerPixel[16] = {
16, //GE_TFMT_5650,
16, //GE_TFMT_5551,
16, //GE_TFMT_4444,
32, //GE_TFMT_8888,
4, //GE_TFMT_CLUT4,
8, //GE_TFMT_CLUT8,
16, //GE_TFMT_CLUT16,
32, //GE_TFMT_CLUT32,
4, //GE_TFMT_DXT1,
8, //GE_TFMT_DXT3,
8, //GE_TFMT_DXT5,
0, // INVALID,
0, // INVALID,
0, // INVALID,
0, // INVALID,
0, // INVALID,
};
static inline u32 MiniHash(const u32 *ptr) {
return ptr[0];
}
@ -856,7 +831,7 @@ static inline u32 QuickClutHash(const u8 *clut, u32 bytes) {
}
static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat format) {
const u32 sizeInRAM = (bitsPerPixel[format] * bufw * h) / 8;
const u32 sizeInRAM = (textureBitsPerPixel[format] * bufw * h) / 8;
const u32 *checkp = (const u32 *) Memory::GetPointer(addr);
u32 check = 0;
@ -1083,7 +1058,7 @@ void TextureCacheDX9::SetTexture() {
int w = gstate.getTextureWidth(0);
int h = gstate.getTextureHeight(0);
int bufw = GetLevelBufw(0, texaddr);
int bufw = GetTextureBufw(0, texaddr, format);
int maxLevel = ((gstate.texmode >> 16) & 0x7);
u32 texhash = MiniHash((const u32 *)Memory::GetPointer(texaddr));
@ -1236,7 +1211,7 @@ void TextureCacheDX9::SetTexture() {
// This would overestimate the size in many case so we underestimate instead
// to avoid excessive clearing caused by cache invalidations.
entry->sizeInRAM = (bitsPerPixel[format] * bufw * h / 2) / 8;
entry->sizeInRAM = (textureBitsPerPixel[format] * bufw * h / 2) / 8;
entry->fullhash = fullhash == 0 ? QuickTexHash(texaddr, bufw, w, h, format) : fullhash;
entry->cluthash = cluthash;
@ -1300,7 +1275,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000);
int bufw = GetLevelBufw(level, texaddr);
int bufw = GetTextureBufw(level, texaddr, format);
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
@ -1310,9 +1285,6 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
case GE_TFMT_CLUT4:
{
dstFmt = getClutDestFormat(clutformat);
// Don't allow this to be less than 16 bytes (32 * 4 / 8 = 16.)
if (bufw < 32)
bufw = 32;
const bool mipmapShareClut = (gstate.texmode & 0x100) == 0;
const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
@ -1371,34 +1343,26 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
break;
case GE_TFMT_CLUT8:
if (bufw < 8)
bufw = 8;
dstFmt = getClutDestFormat(gstate.getClutPaletteFormat());
texByteAlign = texByteAlignMap[gstate.getClutPaletteFormat()];
finalBuf = readIndexedTex(level, texaddr, 1, dstFmt);
finalBuf = ReadIndexedTex(level, texaddr, 1, dstFmt, bufw);
break;
case GE_TFMT_CLUT16:
if (bufw < 8)
bufw = 8;
dstFmt = getClutDestFormat(gstate.getClutPaletteFormat());
texByteAlign = texByteAlignMap[gstate.getClutPaletteFormat()];
finalBuf = readIndexedTex(level, texaddr, 2, dstFmt);
finalBuf = ReadIndexedTex(level, texaddr, 2, dstFmt, bufw);
break;
case GE_TFMT_CLUT32:
if (bufw < 4)
bufw = 4;
dstFmt = getClutDestFormat(gstate.getClutPaletteFormat());
texByteAlign = texByteAlignMap[gstate.getClutPaletteFormat()];
finalBuf = readIndexedTex(level, texaddr, 4, dstFmt);
finalBuf = ReadIndexedTex(level, texaddr, 4, dstFmt, bufw);
break;
case GE_TFMT_4444:
case GE_TFMT_5551:
case GE_TFMT_5650:
if (bufw < 8)
bufw = 8;
if (format == GE_TFMT_4444)
dstFmt = D3DFMT_A4R4G4B4;
else if (format == GE_TFMT_5551)
@ -1422,8 +1386,6 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
break;
case GE_TFMT_8888:
if (bufw < 4)
bufw = 4;
dstFmt = D3DFMT_A8R8G8B8;
if (!gstate.isTextureSwizzled()) {
// Special case: if we don't need to deal with packing, we don't need to copy.

View File

@ -118,7 +118,7 @@ private:
void Decimate(); // Run this once per frame to get rid of old textures.
void *UnswizzleFromMem(u32 texaddr, u32 bufw, u32 bytesPerPixel, u32 level);
void *readIndexedTex(int level, u32 texaddr, int bytesPerIndex, u32 dstFmt);
void *ReadIndexedTex(int level, u32 texaddr, int bytesPerIndex, u32 dstFmt, int bufw);
void UpdateSamplingParams(TexCacheEntry &entry, bool force);
void LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages);
void *DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, u32 &dstFmt);

View File

@ -24,6 +24,7 @@
#include "GPU/GPUState.h"
#include "GPU/GLES/TextureCache.h"
#include "GPU/GLES/Framebuffer.h"
#include "GPU/Common/TextureDecoder.h"
#include "Core/Config.h"
#include "ext/xxhash.h"
@ -45,13 +46,6 @@
extern int g_iNumVideos;
static inline u32 GetLevelBufw(int level, u32 texaddr) {
// Special rules for kernel textures (PPGe):
if (texaddr < PSP_GetUserMemoryBase())
return gstate.texbufwidth[level] & 0x1FFF;
return gstate.texbufwidth[level] & 0x7FF;
}
TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) {
lastBoundTexture = -1;
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
@ -776,45 +770,6 @@ void TextureCache::StartFrame() {
}
}
static const u8 bitsPerPixel[16] = {
16, //GE_TFMT_5650,
16, //GE_TFMT_5551,
16, //GE_TFMT_4444,
32, //GE_TFMT_8888,
4, //GE_TFMT_CLUT4,
8, //GE_TFMT_CLUT8,
16, //GE_TFMT_CLUT16,
32, //GE_TFMT_CLUT32,
4, //GE_TFMT_DXT1,
8, //GE_TFMT_DXT3,
8, //GE_TFMT_DXT5,
0, // INVALID,
0, // INVALID,
0, // INVALID,
0, // INVALID,
0, // INVALID,
};
// Masks to downalign bufw to 16 bytes.
static const u32 alignMask16[16] = {
~(((8 * 16) / 16) - 1), //GE_TFMT_5650,
~(((8 * 16) / 16) - 1), //GE_TFMT_5551,
~(((8 * 16) / 16) - 1), //GE_TFMT_4444,
~(((8 * 16) / 32) - 1), //GE_TFMT_8888,
~(((8 * 16) / 4) - 1), //GE_TFMT_CLUT4,
~(((8 * 16) / 8) - 1), //GE_TFMT_CLUT8,
~(((8 * 16) / 16) - 1), //GE_TFMT_CLUT16,
~(((8 * 16) / 32) - 1), //GE_TFMT_CLUT32,
~(((8 * 16) / 4) - 1), //GE_TFMT_DXT1,
~(((8 * 16) / 8) - 1), //GE_TFMT_DXT3,
~(((8 * 16) / 8) - 1), //GE_TFMT_DXT5,
0, // INVALID,
0, // INVALID,
0, // INVALID,
0, // INVALID,
0, // INVALID,
};
static inline u32 MiniHash(const u32 *ptr) {
return ptr[0];
}
@ -851,7 +806,7 @@ static inline u32 QuickClutHash(const u8 *clut, u32 bytes) {
}
static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat format) {
const u32 sizeInRAM = (bitsPerPixel[format] * bufw * h) / 8;
const u32 sizeInRAM = (textureBitsPerPixel[format] * bufw * h) / 8;
const u32 *checkp = (const u32 *) Memory::GetPointer(addr);
u32 check = 0;
@ -1055,10 +1010,10 @@ void TextureCache::SetTexture() {
} else {
cluthash = 0;
}
int bufw = GetTextureBufw(0, texaddr, format);
int w = gstate.getTextureWidth(0);
int h = gstate.getTextureHeight(0);
int bufw = GetLevelBufw(0, texaddr);
int maxLevel = ((gstate.texmode >> 16) & 0x7);
u32 texhash = MiniHash((const u32 *)Memory::GetPointer(texaddr));
@ -1211,7 +1166,7 @@ void TextureCache::SetTexture() {
// This would overestimate the size in many case so we underestimate instead
// to avoid excessive clearing caused by cache invalidations.
entry->sizeInRAM = (bitsPerPixel[format] * bufw * h / 2) / 8;
entry->sizeInRAM = (textureBitsPerPixel[format] * bufw * h / 2) / 8;
entry->fullhash = fullhash == 0 ? QuickTexHash(texaddr, bufw, w, h, format) : fullhash;
entry->cluthash = cluthash;
@ -1308,12 +1263,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000);
int bufw = GetLevelBufw(level, texaddr) & alignMask16[format];
if (bufw == 0) {
// If it's less than 16 bytes, use 16 bytes.
bufw = (8 * 16) / bitsPerPixel[format];
}
int bufw = GetTextureBufw(level, texaddr, format);
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
const u8 *texptr = Memory::GetPointer(texaddr);
@ -1678,8 +1628,7 @@ bool TextureCache::DecodeTexture(u8* output, GPUgstate state)
GEPaletteFormat clutformat = gstate.getClutPaletteFormat();
u8 level = 0;
int bufw = GetLevelBufw(level, texaddr);
int bufw = GetTextureBufw(level, texaddr, format);
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);

View File

@ -193,6 +193,7 @@
<ClInclude Include="Software\Rasterizer.h" />
<ClInclude Include="Software\SoftGpu.h" />
<ClInclude Include="Software\TransformUnit.h" />
<ClInclude Include="Common\TextureDecoder.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\ext\xbrz\xbrz.cpp" />
@ -243,6 +244,7 @@
<ClCompile Include="Software\Rasterizer.cpp" />
<ClCompile Include="Software\SoftGpu.cpp" />
<ClCompile Include="Software\TransformUnit.cpp" />
<ClCompile Include="Common\TextureDecoder.cpp" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Common\Common.vcxproj">

View File

@ -138,6 +138,9 @@
<ClInclude Include="Directx9\helper\global.h">
<Filter>DirectX9\helper</Filter>
</ClInclude>
<ClInclude Include="Common\TextureDecoder.h">
<Filter>Common</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Math3D.cpp">
@ -254,6 +257,9 @@
<ClCompile Include="Directx9\helper\global.cpp">
<Filter>DirectX9\helper</Filter>
</ClCompile>
<ClCompile Include="Common\TextureDecoder.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />

View File

@ -19,6 +19,7 @@
#include "Core/Reporting.h"
#include "GPU/GPUState.h"
#include "GPU/Common/TextureDecoder.h"
#include "GPU/Software/SoftGpu.h"
#include "GPU/Software/Rasterizer.h"
#include "GPU/Software/Colors.h"
@ -171,8 +172,7 @@ static inline u32 SampleNearest(int level, unsigned int u, unsigned int v)
u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000);
u8* srcptr = (u8*)Memory::GetPointer(texaddr); // TODO: not sure if this is the right place to load from...?
// Special rules for kernel textures (PPGe), TODO: Verify!
int texbufwidth = (texaddr < PSP_GetUserMemoryBase()) ? gstate.texbufwidth[level] & 0x1FFF : gstate.texbufwidth[level] & 0x7FF;
int texbufwidth = GetTextureBufw(level, texaddr, texfmt);
// TODO: Should probably check if textures are aligned properly...

View File

@ -194,6 +194,7 @@ LOCAL_SRC_FILES := \
$(SRC)/GPU/GeDisasm.cpp \
$(SRC)/GPU/Common/IndexGenerator.cpp.arm \
$(SRC)/GPU/Common/VertexDecoderCommon.cpp.arm \
$(SRC)/GPU/Common/TextureDecoder.cpp \
$(SRC)/GPU/GLES/Framebuffer.cpp \
$(SRC)/GPU/GLES/GLES_GPU.cpp.arm \
$(SRC)/GPU/GLES/TextureCache.cpp.arm \