Merge pull request #1787 from unknownbrackets/gpu-minor

Misc. GE changes and CLUT fix
This commit is contained in:
Henrik Rydgård 2013-05-12 12:14:24 -07:00
commit 3d56770eac
5 changed files with 101 additions and 48 deletions

View File

@ -583,7 +583,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_LOADCLUT:
gstate_c.textureChanged = true;
textureCache_.UpdateCurrentClut();
textureCache_.LoadClut();
// This could be used to "dirty" textures with clut.
break;
@ -938,6 +938,35 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
}
break;
#ifndef USING_GLES2
case GE_CMD_LOGICOPENABLE:
if (data != 0)
ERROR_LOG_REPORT_ONCE(logicOpEnable, G3D, "Unsupported logic op enabled: %x", data);
break;
case GE_CMD_LOGICOP:
if (data != 0)
ERROR_LOG_REPORT_ONCE(logicOp, G3D, "Unsupported logic op: %06x", data);
break;
case GE_CMD_ANTIALIASENABLE:
if (data != 0)
WARN_LOG_REPORT_ONCE(antiAlias, G3D, "Unsupported antialias enabled: %06x", data);
break;
case GE_CMD_TEXLODSLOPE:
if (data != 0)
WARN_LOG_REPORT_ONCE(texLodSlope, G3D, "Unsupported texture lod slope: %06x", data);
break;
case GE_CMD_TEXLEVEL:
if (data == 1)
WARN_LOG_REPORT_ONCE(texLevel1, G3D, "Unsupported texture level bias settings: %06x", data)
else if (data != 0)
WARN_LOG_REPORT_ONCE(texLevel2, G3D, "Unsupported texture level bias settings: %06x", data);
break;
#endif
default:
GPUCommon::ExecuteOp(op, diff);
break;

View File

@ -73,6 +73,10 @@ static bool IsColorTestTriviallyTrue() {
}
static bool CanDoubleSrcBlendMode() {
if (!gstate.isAlphaBlendEnabled()) {
return false;
}
int funcA = gstate.getBlendFuncA();
int funcB = gstate.getBlendFuncB();
if (funcA != GE_SRCBLEND_DOUBLESRCALPHA) {

View File

@ -57,7 +57,7 @@ static inline u32 GetLevelBufw(int level, u32 texaddr) {
return gstate.texbufwidth[level] & 0x7FF;
}
TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false) {
TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutDirty_(false) {
lastBoundTexture = -1;
// This is 5MB of temporary storage. Might be possible to shrink it.
tmpTexBuf32.resize(1024 * 512); // 2MB
@ -184,7 +184,7 @@ void TextureCache::NotifyFramebufferDestroyed(u32 address, VirtualFramebuffer *f
}
}
static u32 GetClutAddr(u32 clutEntrySize) {
static u32 GetClutAddr() {
return ((gstate.clutaddr & 0xFFFFFF) | ((gstate.clutaddrupper << 8) & 0x0F000000));
}
@ -773,19 +773,44 @@ inline bool TextureCache::TexCacheEntry::MatchesClut(bool hasClut, u8 clutformat
return clutformat == clutformat2;
}
void TextureCache::UpdateCurrentClut() {
GEPaletteFormat clutFormat = (GEPaletteFormat)(gstate.clutformat & 3);
const u32 clutColorBytes = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2;
u32 clutAddr = GetClutAddr(clutFormat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2);
void TextureCache::LoadClut() {
u32 clutAddr = GetClutAddr();
u32 clutTotalBytes = (gstate.loadclut & 0x3f) * 32;
if (Memory::IsValidAddress(clutAddr)) {
Memory::Memcpy((u8 *)clutBuf_, clutAddr, clutTotalBytes);
convertColors((u8 *)clutBuf_, getClutDestFormat(clutFormat), clutTotalBytes / clutColorBytes);
clutHash_ = CityHash32((const char *)clutBuf_, clutTotalBytes);
} else {
memset(clutBuf_, 0xFF, clutTotalBytes);
clutHash_ = 0;
}
clutDirty_ = true;
}
void TextureCache::UpdateCurrentClut() {
GEPaletteFormat clutFormat = (GEPaletteFormat)(gstate.clutformat & 3);
const u32 clutColorBytes = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2;
u32 clutTotalBytes = (gstate.loadclut & 0x3f) * 32;
convertColors((u8 *)clutBuf_, getClutDestFormat(clutFormat), clutTotalBytes / clutColorBytes);
// Special optimization: fonts typically draw clut4 with just alpha values in a single color.
clutAlphaLinear_ = false;
clutAlphaLinearColor_ = 0;
if (gstate.clutformat == (0xC500FF00 | GE_CMODE_16BIT_ABGR4444)) {
const u16 *clut = GetCurrentClut<u16>();
clutAlphaLinear_ = true;
clutAlphaLinearColor_ = clut[15] & 0xFFF0;
for (int i = 0; i < 16; ++i) {
if ((clut[i] & 0xf) != i) {
clutAlphaLinear_ = false;
break;
}
// Alpha 0 doesn't matter.
if (i != 0 && (clut[i] & 0xFFF0) != clutAlphaLinearColor_) {
clutAlphaLinear_ = false;
break;
}
}
}
}
template <typename T>
@ -817,6 +842,11 @@ void TextureCache::SetTexture() {
u32 clutformat, cluthash;
if (hasClut) {
if (clutDirty_) {
// We update here because the clut format can be specified after the load.
UpdateCurrentClut();
clutDirty_ = false;
}
clutformat = gstate.clutformat & 3;
cluthash = GetCurrentClutHash();
cachekey |= (u64)cluthash << 32;
@ -1069,8 +1099,12 @@ void *TextureCache::DecodeTextureLevel(u8 format, u8 clutformat, int level, u32
switch (format)
{
case GE_TFMT_CLUT4:
{
dstFmt = getClutDestFormat((GEPaletteFormat)(clutformat));
const bool mipmapShareClut = (gstate.texmode & 0x100) == 0;
const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
switch (clutformat) {
case GE_CMODE_16BIT_BGR5650:
case GE_CMODE_16BIT_ABGR5551:
@ -1078,43 +1112,21 @@ void *TextureCache::DecodeTextureLevel(u8 format, u8 clutformat, int level, u32
{
tmpTexBuf16.resize(std::max(bufw, w) * h);
tmpTexBufRearrange.resize(std::max(bufw, w) * h);
const u16 *clut = GetCurrentClut<u16>();
u32 clutSharingOffset = 0; //(gstate.mipmapShareClut & 1) ? 0 : level * 16;
const u16 *clut = GetCurrentClut<u16>() + clutSharingOffset;
texByteAlign = 2;
// Special optimization: fonts typically draw clut4 with just alpha values in a single color.
bool linearClut = false;
u16 linearColor = 0;
if (gstate.clutformat == (0xC500FF00 | GE_CMODE_16BIT_ABGR4444)) {
// TODO: Do this check once per CLUT load?
linearClut = true;
linearColor = clut[clutSharingOffset + 15] & 0xFFF0;
for (int i = 0; i < 16; ++i) {
if ((clut[clutSharingOffset + i] & 0xf) != i) {
linearClut = false;
break;
}
// Alpha 0 doesn't matter.
if (i != 0 && (clut[clutSharingOffset + i] & 0xFFF0) != linearColor) {
linearClut = false;
break;
}
}
}
if (!(gstate.texmode & 1)) {
if (linearClut) {
DeIndexTexture4Optimal(tmpTexBuf16.data(), texaddr, bufw * h, linearColor);
if (clutAlphaLinear_ && mipmapShareClut) {
DeIndexTexture4Optimal(tmpTexBuf16.data(), texaddr, bufw * h, clutAlphaLinearColor_);
} else {
DeIndexTexture4(tmpTexBuf16.data(), texaddr, bufw * h, clut + clutSharingOffset);
DeIndexTexture4(tmpTexBuf16.data(), texaddr, bufw * h, clut);
}
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
UnswizzleFromMem(texaddr, bufw, 0, level);
if (linearClut) {
DeIndexTexture4Optimal(tmpTexBuf16.data(), (u8 *)tmpTexBuf32.data(), bufw * h, linearColor);
if (clutAlphaLinear_ && mipmapShareClut) {
DeIndexTexture4Optimal(tmpTexBuf16.data(), (u8 *)tmpTexBuf32.data(), bufw * h, clutAlphaLinearColor_);
} else {
DeIndexTexture4(tmpTexBuf16.data(), (u8 *)tmpTexBuf32.data(), bufw * h, clut + clutSharingOffset);
DeIndexTexture4(tmpTexBuf16.data(), (u8 *)tmpTexBuf32.data(), bufw * h, clut);
}
}
finalBuf = tmpTexBuf16.data();
@ -1125,16 +1137,15 @@ void *TextureCache::DecodeTextureLevel(u8 format, u8 clutformat, int level, u32
{
tmpTexBuf32.resize(std::max(bufw, w) * h);
tmpTexBufRearrange.resize(std::max(bufw, w) * h);
const u32 *clut = GetCurrentClut<u32>();
u32 clutSharingOffset = 0;//gstate.mipmapShareClut ? 0 : level * 16;
const u32 *clut = GetCurrentClut<u32>() + clutSharingOffset;
if (!(gstate.texmode & 1)) {
DeIndexTexture4(tmpTexBuf32.data(), texaddr, bufw * h, clut + clutSharingOffset);
DeIndexTexture4(tmpTexBuf32.data(), texaddr, bufw * h, clut);
finalBuf = tmpTexBuf32.data();
} else {
UnswizzleFromMem(texaddr, bufw, 0, level);
// Let's reuse tmpTexBuf16, just need double the space.
tmpTexBuf16.resize(std::max(bufw, w) * h * 2);
DeIndexTexture4((u32 *)tmpTexBuf16.data(), (u8 *)tmpTexBuf32.data(), bufw * h, clut + clutSharingOffset);
DeIndexTexture4((u32 *)tmpTexBuf16.data(), (u8 *)tmpTexBuf32.data(), bufw * h, clut);
finalBuf = tmpTexBuf16.data();
}
}
@ -1144,6 +1155,7 @@ void *TextureCache::DecodeTextureLevel(u8 format, u8 clutformat, int level, u32
ERROR_LOG(G3D, "Unknown CLUT4 texture mode %d", (gstate.clutformat & 3));
return NULL;
}
}
break;
case GE_TFMT_CLUT8:

View File

@ -38,7 +38,7 @@ public:
void Invalidate(u32 addr, int size, GPUInvalidationType type);
void InvalidateAll(GPUInvalidationType type);
void ClearNextFrame();
void UpdateCurrentClut();
void LoadClut();
// FramebufferManager keeps TextureCache updated about what regions of memory
// are being rendered to. This is barebones so far.
@ -111,6 +111,7 @@ private:
template <typename T>
const T *GetCurrentClut();
u32 GetCurrentClutHash();
void UpdateCurrentClut();
TexCacheEntry *GetEntryAt(u32 texaddr);
@ -127,8 +128,12 @@ private:
SimpleBuf<u32> tmpTexBufRearrange;
bool clutDirty_;
u32 *clutBuf_;
u32 clutHash_;
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
bool clutAlphaLinear_;
u16 clutAlphaLinearColor_;
u32 lastBoundTexture;
float maxAnisotropyLevel;

View File

@ -473,8 +473,8 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer) {
case GE_CMD_TRANSFERSRCPOS:
{
u32 x = (data & 1023)+1;
u32 y = ((data>>10) & 1023)+1;
u32 x = (data & 1023);
u32 y = ((data>>10) & 1023);
if (data & 0xF00000)
sprintf(buffer, "Block transfer src rect TL: %i, %i (extra %x)", x, y, data >> 20);
else
@ -484,8 +484,8 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer) {
case GE_CMD_TRANSFERDSTPOS:
{
u32 x = (data & 1023)+1;
u32 y = ((data>>10) & 1023)+1;
u32 x = (data & 1023);
u32 y = ((data>>10) & 1023);
if (data & 0xF00000)
sprintf(buffer, "Block transfer dest rect TL: %i, %i (extra %x)", x, y, data >> 20);
else
@ -849,7 +849,7 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer) {
break;
case GE_CMD_TEXMODE:
sprintf(buffer, "TexMode %06x (%s)", data, data & 1 ? "swizzle" : "no swizzle");
sprintf(buffer, "TexMode %06x (%s, %d levels, %s)", data, data & 1 ? "swizzle" : "no swizzle", (data >> 16) & 7, (data >> 8) & 1 ? "separate cluts" : "shared clut");
break;
case GE_CMD_TEXFORMAT:
@ -929,7 +929,10 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer) {
break;
case GE_CMD_STENCILOP:
sprintf(buffer, "Stencil op: %06x", data);
{
const char *stencilOps[] = { "KEEP", "ZERO", "REPLACE", "INVERT", "INCREMENT", "DECREMENT", "unsupported1", "unsupported2" };
sprintf(buffer, "Stencil op: fail=%s, pass/depthfail=%s, pass=%s", stencilOps[data & 7], stencilOps[(data >> 8) & 7], stencilOps[(data >> 16) & 7]);
}
break;
case GE_CMD_STENCILTEST: