Merge pull request #6864 from unknownbrackets/d3d9

Update Direct3D 9 texture cache
This commit is contained in:
Henrik Rydgård 2014-09-09 10:45:27 +02:00
commit 7d6377295a
13 changed files with 683 additions and 220 deletions

View File

@ -498,6 +498,8 @@ namespace DX9 {
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED);
vfb->last_frame_render = gpuStats.numFlips;
vfb->last_frame_used = 0;
vfb->last_frame_attached = 0;
frameLastFramebufUsed = gpuStats.numFlips;
vfbs_.push_back(vfb);
ClearBuffer();
@ -1104,8 +1106,8 @@ namespace DX9 {
hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY);
if (SUCCEEDED(hr)) {
// TODO: Handle the other formats? We don't currently create them, I think.
buffer.Allocate(locked.Pitch / 4, vfb->renderHeight, GPU_DBG_FORMAT_8888_BGRA, false);
memcpy(buffer.GetData(), locked.pBits, locked.Pitch * vfb->renderHeight);
buffer.Allocate(locked.Pitch / 4, desc.Height, GPU_DBG_FORMAT_8888_BGRA, false);
memcpy(buffer.GetData(), locked.pBits, locked.Pitch * desc.Height);
offscreen->UnlockRect();
success = true;
}

View File

@ -51,6 +51,7 @@ enum {
struct VirtualFramebufferDX9 {
int last_frame_used;
int last_frame_attached;
int last_frame_render;
bool memoryUpdated;

View File

@ -388,6 +388,8 @@ DIRECTX9_GPU::DIRECTX9_GPU()
transformDraw_.SetFramebufferManager(&framebufferManager_);
framebufferManager_.SetTextureCache(&textureCache_);
framebufferManager_.SetShaderManager(shaderManager_);
textureCache_.SetFramebufferManager(&framebufferManager_);
textureCache_.SetShaderManager(shaderManager_);
// Sanity check gstate
if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) {
@ -1582,6 +1584,7 @@ bool DIRECTX9_GPU::GetCurrentTexture(GPUDebugBuffer &buffer, int level) {
LPDIRECT3DBASETEXTURE9 baseTex;
LPDIRECT3DTEXTURE9 tex;
LPDIRECT3DSURFACE9 offscreen = nullptr;
HRESULT hr;
bool success;
@ -1594,6 +1597,22 @@ bool DIRECTX9_GPU::GetCurrentTexture(GPUDebugBuffer &buffer, int level) {
tex->GetLevelDesc(level, &desc);
RECT rect = {0, 0, desc.Width, desc.Height};
hr = tex->LockRect(level, &locked, &rect, D3DLOCK_READONLY);
// If it fails, this means it's a render-to-texture, so we have to get creative.
if (FAILED(hr)) {
LPDIRECT3DSURFACE9 renderTarget;
hr = tex->GetSurfaceLevel(level, &renderTarget);
if (SUCCEEDED(hr)) {
hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL);
if (SUCCEEDED(hr)) {
hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen);
if (SUCCEEDED(hr)) {
hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY);
}
}
}
}
if (SUCCEEDED(hr)) {
GPUDebugBufferFormat fmt;
int pixelSize;
@ -1626,7 +1645,12 @@ bool DIRECTX9_GPU::GetCurrentTexture(GPUDebugBuffer &buffer, int level) {
} else {
success = false;
}
tex->UnlockRect(level);
if (offscreen) {
offscreen->UnlockRect();
offscreen->Release();
} else {
tex->UnlockRect(level);
}
}
tex->Release();
}

View File

@ -43,12 +43,8 @@ static const bool safeDestFactors[16] = {
true, //GE_DSTBLEND_FIXB,
};
static bool IsAlphaTestTriviallyTrue() {
GEComparison alphaTestFunc = gstate.getAlphaTestFunction();
int alphaTestRef = gstate.getAlphaTestRef();
int alphaTestMask = gstate.getAlphaTestMask();
switch (alphaTestFunc) {
bool IsAlphaTestTriviallyTrue() {
switch (gstate.getAlphaTestFunction()) {
case GE_COMP_NEVER:
return false;
@ -56,36 +52,60 @@ static bool IsAlphaTestTriviallyTrue() {
return true;
case GE_COMP_GEQUAL:
return alphaTestRef == 0;
if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
return true; // If alpha is full, it doesn't matter what the ref value is.
return gstate.getAlphaTestRef() == 0;
// Non-zero check. If we have no depth testing (and thus no depth writing), and an alpha func that will result in no change if zero alpha, get rid of the alpha test.
// Speeds up Lumines by a LOT on PowerVR.
case GE_COMP_NOTEQUAL:
if (gstate.getAlphaTestRef() == 255) {
// Likely to be rare. Let's just skip the vertexFullAlpha optimization here instead of adding
// complicated code to discard the draw or whatnot.
return false;
}
// Fallthrough on purpose
case GE_COMP_GREATER:
{
bool depthTest = gstate.isDepthTestEnabled();
#if 0
// Easy way to check the values in the debugger without ruining && early-out
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool stencilTest = gstate.isStencilTestEnabled();
GEBlendSrcFactor src = gstate.getBlendFuncA();
GEBlendDstFactor dst = gstate.getBlendFuncB();
if (!stencilTest && !depthTest && alphaTestRef == 0 && gstate.isAlphaBlendEnabled() && src == GE_SRCBLEND_SRCALPHA && safeDestFactors[(int)dst])
return true;
return false;
bool depthTest = gstate.isDepthTestEnabled();
GEComparison depthTestFunc = gstate.getDepthTestFunction();
int alphaRef = gstate.getAlphaTestRef();
int blendA = gstate.getBlendFuncA();
bool blendEnabled = gstate.isAlphaBlendEnabled();
int blendB = gstate.getBlendFuncA();
#endif
return (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed())) || (
(!gstate.isStencilTestEnabled() &&
!gstate.isDepthTestEnabled() &&
gstate.getAlphaTestRef() == 0 &&
gstate.isAlphaBlendEnabled() &&
gstate.getBlendFuncA() == GE_SRCBLEND_SRCALPHA &&
safeDestFactors[(int)gstate.getBlendFuncB()]));
}
case GE_COMP_LEQUAL:
return alphaTestRef == 255;
return gstate.getAlphaTestRef() == 255;
case GE_COMP_EQUAL:
case GE_COMP_LESS:
return false;
default:
return false;
}
}
static bool IsColorTestTriviallyTrue() {
GEComparison colorTestFunc = gstate.getColorTestFunction();
switch (colorTestFunc) {
bool IsAlphaTestAgainstZero() {
return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF;
}
bool IsColorTestTriviallyTrue() {
switch (gstate.getColorTestFunction()) {
case GE_COMP_NEVER:
return false;

View File

@ -24,15 +24,12 @@
namespace DX9 {
struct FragmentShaderIDDX9
{
FragmentShaderIDDX9() {d[0] = 0xFFFFFFFF;}
void clear() {d[0] = 0xFFFFFFFF;}
u32 d[1];
bool operator < (const FragmentShaderIDDX9 &other) const
{
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
{
struct FragmentShaderIDDX9 {
FragmentShaderIDDX9() {clear();}
void clear() {d[0] = 0xFFFFFFFF; d[1] = 0xFFFFFFFF;}
u32 d[2];
bool operator < (const FragmentShaderIDDX9 &other) const {
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
if (d[i] < other.d[i])
return true;
if (d[i] > other.d[i])
@ -40,10 +37,8 @@ struct FragmentShaderIDDX9
}
return false;
}
bool operator == (const FragmentShaderIDDX9 &other) const
{
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
{
bool operator == (const FragmentShaderIDDX9 &other) const {
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
if (d[i] != other.d[i])
return false;
}
@ -51,9 +46,11 @@ struct FragmentShaderIDDX9
}
};
void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id);
void GenerateFragmentShaderDX9(char *buffer);
bool IsAlphaTestAgainstZero();
bool IsAlphaTestTriviallyTrue();
bool IsColorTestTriviallyTrue();
};

View File

@ -23,6 +23,7 @@
#include "Core/Reporting.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
#include "GPU/Directx9/PixelShaderGeneratorDX9.h"
#include "GPU/Directx9/TextureCacheDX9.h"
#include "GPU/Directx9/FramebufferDX9.h"
#include "GPU/Directx9/helper/dx_state.h"
@ -49,15 +50,29 @@ namespace DX9 {
// Try to be prime to other decimation intervals.
#define TEXCACHE_DECIMATION_INTERVAL 13
TextureCacheDX9::TextureCacheDX9() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) {
// Changes more frequent than this will be considered "frequent" and prevent texture scaling.
#define TEXCACHE_FRAME_CHANGE_FREQUENT 6
#define TEXCACHE_MAX_TEXELS_SCALED (256*256) // Per frame
TextureCacheDX9::TextureCacheDX9() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), texelsScaledThisFrame_(0) {
timesInvalidatedAllThisFrame_ = 0;
lastBoundTexture = INVALID_TEX;
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
// This is 5MB of temporary storage. Might be possible to shrink it.
tmpTexBuf32.resize(1024 * 512); // 2MB
tmpTexBuf16.resize(1024 * 512); // 1MB
tmpTexBufRearrange.resize(1024 * 512); // 2MB
// Aren't these way too big?
clutBufConverted_ = (u32 *)AllocateAlignedMemory(4096 * sizeof(u32), 16); // 16KB
clutBufRaw_ = (u32 *)AllocateAlignedMemory(4096 * sizeof(u32), 16); // 16KB
// Zap these so that reads from uninitialized parts of the CLUT look the same in
// release and debug
memset(clutBufConverted_, 0, 4096 * sizeof(u32));
memset(clutBufRaw_, 0, 4096 * sizeof(u32));
maxAnisotropyLevel = 16;
SetupTextureDecoder();
}
@ -85,6 +100,21 @@ void TextureCacheDX9::Clear(bool delete_them) {
cache.clear();
secondCache.clear();
}
fbTexInfo_.clear();
}
void TextureCacheDX9::DeleteTexture(TexCache::iterator it) {
it->second.ReleaseTexture();
auto fbInfo = fbTexInfo_.find(it->second.addr);
if (fbInfo != fbTexInfo_.end()) {
fbTexInfo_.erase(fbInfo);
}
cache.erase(it);
}
void TextureCacheDX9::ForgetLastTexture() {
lastBoundTexture = INVALID_TEX;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
}
// Removes old textures.
@ -100,8 +130,7 @@ void TextureCacheDX9::Decimate() {
int killAge = lowMemoryMode_ ? TEXTURE_KILL_AGE_LOWMEM : TEXTURE_KILL_AGE;
for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ) {
if (iter->second.lastFrame + killAge < gpuStats.numFlips) {
iter->second.ReleaseTexture();
cache.erase(iter++);
DeleteTexture(iter++);
} else {
++iter;
}
@ -131,16 +160,19 @@ void TextureCacheDX9::Invalidate(u32 addr, int size, GPUInvalidationType type) {
// They could invalidate inside the texture, let's just give a bit of leeway.
const int LARGEST_TEXTURE_SIZE = 512 * 512 * 4;
u64 startKey = addr - LARGEST_TEXTURE_SIZE;
u64 endKey = addr + size + LARGEST_TEXTURE_SIZE;
const u64 startKey = (u64)(addr - LARGEST_TEXTURE_SIZE) << 32;
u64 endKey = (u64)(addr + size + LARGEST_TEXTURE_SIZE) << 32;
if (endKey < startKey) {
endKey = (u64)-1;
}
for (TexCache::iterator iter = cache.lower_bound(startKey), end = cache.upper_bound(endKey); iter != end; ++iter) {
u32 texAddr = iter->second.addr;
u32 texEnd = iter->second.addr + iter->second.sizeInRAM;
if (texAddr < addr_end && addr < texEnd) {
if ((iter->second.status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) {
// Clear status -> STATUS_HASHING.
iter->second.status &= ~TexCacheEntry::STATUS_MASK;
if (iter->second.GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
iter->second.SetHashStatus(TexCacheEntry::STATUS_HASHING);
}
if (type != GPU_INVALIDATE_ALL) {
gpuStats.numTextureInvalidations++;
@ -160,10 +192,14 @@ void TextureCacheDX9::InvalidateAll(GPUInvalidationType /*unused*/) {
return;
}
if (timesInvalidatedAllThisFrame_ > 5) {
return;
}
timesInvalidatedAllThisFrame_++;
for (TexCache::iterator iter = cache.begin(), end = cache.end(); iter != end; ++iter) {
if ((iter->second.status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) {
// Clear status -> STATUS_HASHING.
iter->second.status &= ~TexCacheEntry::STATUS_MASK;
if (iter->second.GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
iter->second.SetHashStatus(TexCacheEntry::STATUS_HASHING);
}
if (!iter->second.framebuffer) {
iter->second.invalidHint++;
@ -176,68 +212,144 @@ void TextureCacheDX9::ClearNextFrame() {
}
template <typename T>
inline void AttachFramebufferValid(T &entry, VirtualFramebufferDX9 *framebuffer) {
void TextureCacheDX9::AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer, const AttachedFramebufferInfo &fbInfo) {
const bool hasInvalidFramebuffer = entry->framebuffer == 0 || entry->invalidHint == -1;
const bool hasOlderFramebuffer = entry->framebuffer != 0 && entry->framebuffer->last_frame_render < framebuffer->last_frame_render;
if (hasInvalidFramebuffer || hasOlderFramebuffer) {
bool hasFartherFramebuffer = false;
if (!hasInvalidFramebuffer && !hasOlderFramebuffer) {
// If it's valid, but the offset is greater, then we still win.
if (fbTexInfo_[entry->addr].yOffset == fbInfo.yOffset)
hasFartherFramebuffer = fbTexInfo_[entry->addr].xOffset > fbInfo.xOffset;
else
hasFartherFramebuffer = fbTexInfo_[entry->addr].yOffset > fbInfo.yOffset;
}
if (hasInvalidFramebuffer || hasOlderFramebuffer || hasFartherFramebuffer) {
entry->framebuffer = framebuffer;
entry->invalidHint = 0;
entry->status &= ~TextureCacheDX9::TexCacheEntry::STATUS_DEPALETTIZE;
fbTexInfo_[entry->addr] = fbInfo;
framebuffer->last_frame_attached = gpuStats.numFlips;
host->GPUNotifyTextureAttachment(entry->addr);
} else if (entry->framebuffer == framebuffer) {
framebuffer->last_frame_attached = gpuStats.numFlips;
}
}
template <typename T>
inline void AttachFramebufferInvalid(T &entry, VirtualFramebufferDX9 *framebuffer) {
void TextureCacheDX9::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer, const AttachedFramebufferInfo &fbInfo) {
if (entry->framebuffer == 0 || entry->framebuffer == framebuffer) {
entry->framebuffer = framebuffer;
entry->invalidHint = -1;
entry->status &= ~TextureCacheDX9::TexCacheEntry::STATUS_DEPALETTIZE;
fbTexInfo_[entry->addr] = fbInfo;
host->GPUNotifyTextureAttachment(entry->addr);
}
}
inline void TextureCacheDX9::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer, bool exactMatch) {
bool TextureCacheDX9::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer, u32 texaddrOffset) {
static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32;
AttachedFramebufferInfo fbInfo = {0};
const u64 mirrorMask = 0x00600000;
// Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors.
const u32 addr = (address | 0x04000000) & 0x3FFFFFFF & ~mirrorMask;
const u32 texaddr = ((entry->addr + texaddrOffset) & ~mirrorMask);
const bool noOffset = texaddr == addr;
const bool exactMatch = noOffset && entry->format < 4;
const u32 h = 1 << ((entry->dim >> 8) & 0xf);
// 512 on a 272 framebuffer is sane, so let's be lenient.
const u32 minSubareaHeight = h / 4;
// If they match exactly, it's non-CLUT and from the top left.
if (exactMatch) {
// Apply to non-buffered and buffered mode only.
if (!(g_Config.iRenderingMode == FB_NON_BUFFERED_MODE || g_Config.iRenderingMode == FB_BUFFERED_MODE))
return;
return false;
DEBUG_LOG(G3D, "Render to texture detected at %08x!", address);
if (!entry->framebuffer || entry->invalidHint == -1) {
if (entry->format != framebuffer->format) {
WARN_LOG_REPORT_ONCE(diffFormat1, G3D, "Render to texture with different formats %d != %d", entry->format, framebuffer->format);
// If it already has one, let's hope that one is correct.
AttachFramebufferInvalid(entry, framebuffer);
} else {
AttachFramebufferValid(entry, framebuffer);
if (framebuffer->fb_stride != entry->bufw) {
WARN_LOG_REPORT_ONCE(diffStrides1, G3D, "Render to texture with different strides %d != %d", entry->bufw, framebuffer->fb_stride);
}
if (entry->format != framebuffer->format) {
WARN_LOG_REPORT_ONCE(diffFormat1, G3D, "Render to texture with different formats %d != %d", entry->format, framebuffer->format);
// Let's avoid using it when we know the format is wrong. May be a video/etc. updating memory.
// However, some games use a different format to clear the buffer.
if (framebuffer->last_frame_attached + 1 < gpuStats.numFlips) {
DetachFramebuffer(entry, address, framebuffer);
}
// TODO: Delete the original non-fbo texture too.
} else {
AttachFramebufferValid(entry, framebuffer, fbInfo);
return true;
}
} else {
// Apply to buffered mode only.
if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE))
return;
return false;
// 3rd Birthday (and possibly other games) render to a 16 bit clut texture.
const bool compatFormat = framebuffer->format == entry->format
|| (framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32)
|| (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16);
const bool clutFormat =
(framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) ||
(framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16);
// Is it at least the right stride?
if (framebuffer->fb_stride == entry->bufw && compatFormat) {
if (framebuffer->format != entry->format) {
WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address);
// TODO: Use an FBO to translate the palette?
AttachFramebufferValid(entry, framebuffer);
} else if ((entry->addr - address) / entry->bufw < framebuffer->height) {
WARN_LOG_REPORT_ONCE(subarea, G3D, "Render to area containing texture at %08x", address);
// TODO: Keep track of the y offset.
// If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect.
AttachFramebufferInvalid(entry, framebuffer);
const u32 bitOffset = (texaddr - addr) * 8;
const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry->format]);
fbInfo.yOffset = pixelOffset / entry->bufw;
fbInfo.xOffset = pixelOffset % entry->bufw;
if (framebuffer->fb_stride != entry->bufw) {
if (noOffset) {
WARN_LOG_REPORT_ONCE(diffStrides2, G3D, "Render to texture using CLUT with different strides %d != %d", entry->bufw, framebuffer->fb_stride);
} else {
// Assume any render-to-tex with different bufw + offset is a render from ram.
DetachFramebuffer(entry, address, framebuffer);
return false;
}
}
if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) {
// Can't be inside the framebuffer then, ram. Detach to be safe.
DetachFramebuffer(entry, address, framebuffer);
return false;
}
// Trying to play it safe. Below 0x04110000 is almost always framebuffers.
// TODO: Maybe we can reduce this check and find a better way above 0x04110000?
if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000) {
WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible render to texture at %08x +%dx%d / %dx%d", address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height);
DetachFramebuffer(entry, address, framebuffer);
return false;
}
// Check for CLUT. The framebuffer is always RGB, but it can be interpreted as a CLUT texture.
// 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture.
if (clutFormat) {
if (!noOffset) {
WARN_LOG_REPORT_ONCE(subareaClut, G3D, "Render to texture using CLUT with offset at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset);
}
AttachFramebufferValid(entry, framebuffer, fbInfo);
entry->status |= TexCacheEntry::STATUS_DEPALETTIZE;
// We'll validate it compiles later.
return true;
} else if (entry->format == GE_TFMT_CLUT8 || entry->format == GE_TFMT_CLUT4) {
ERROR_LOG_REPORT_ONCE(fourEightBit, G3D, "4 and 8-bit CLUT format not supported for framebuffers");
}
// This is either normal or we failed to generate a shader to depalettize
if (framebuffer->format == entry->format || clutFormat) {
if (framebuffer->format != entry->format) {
WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address);
AttachFramebufferValid(entry, framebuffer, fbInfo);
return true;
} else {
WARN_LOG_REPORT_ONCE(subarea, G3D, "Render to area containing texture at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset);
// If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect.
AttachFramebufferInvalid(entry, framebuffer, fbInfo);
return true;
}
} else {
WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with incompatible formats %d != %d at %08x", entry->format, framebuffer->format, address);
}
}
return false;
}
inline void TextureCacheDX9::DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer) {
@ -248,14 +360,18 @@ inline void TextureCacheDX9::DetachFramebuffer(TexCacheEntry *entry, u32 address
}
void TextureCacheDX9::NotifyFramebuffer(u32 address, VirtualFramebufferDX9 *framebuffer, FramebufferNotification msg) {
// This is a rough heuristic, because sometimes our framebuffers are too tall.
static const u32 MAX_SUBAREA_Y_OFFSET = 32;
// Must be in VRAM so | 0x04000000 it is.
const u64 cacheKey = (u64)(address | 0x04000000) << 32;
// Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors.
// These checks are mainly to reduce scanning all textures.
const u32 addr = (address | 0x04000000) & 0x3F9FFFFF;
const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
const u64 cacheKey = (u64)addr << 32;
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
const u64 cacheKeyEnd = cacheKey + ((u64)(framebuffer->fb_stride * MAX_SUBAREA_Y_OFFSET) << 32);
const u64 cacheKeyEnd = cacheKey + ((u64)(framebuffer->fb_stride * framebuffer->height * bpp) << 32);
// The first mirror starts at 0x04200000 and there are 3. We search all for framebuffers.
const u64 mirrorCacheKey = (u64)0x04200000 << 32;
const u64 mirrorCacheKeyEnd = (u64)0x04800000 << 32;
switch (msg) {
case NOTIFY_FB_CREATED:
@ -265,14 +381,21 @@ void TextureCacheDX9::NotifyFramebuffer(u32 address, VirtualFramebufferDX9 *fram
fbCache_.push_back(framebuffer);
}
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
AttachFramebuffer(&it->second, address | 0x04000000, framebuffer, it->first == cacheKey);
AttachFramebuffer(&it->second, addr, framebuffer);
}
// Let's assume anything in mirrors is fair game to check.
for (auto it = cache.lower_bound(mirrorCacheKey), end = cache.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
AttachFramebuffer(&it->second, addr, framebuffer);
}
break;
case NOTIFY_FB_DESTROYED:
fbCache_.erase(std::remove(fbCache_.begin(), fbCache_.end(), framebuffer), fbCache_.end());
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
DetachFramebuffer(&it->second, address | 0x04000000, framebuffer);
DetachFramebuffer(&it->second, addr, framebuffer);
}
for (auto it = cache.lower_bound(mirrorCacheKey), end = cache.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
DetachFramebuffer(&it->second, addr, framebuffer);
}
break;
}
@ -480,40 +603,80 @@ static const u8 MagFilt[2] = {
D3DTEXF_LINEAR
};
void TextureCacheDX9::UpdateSamplingParams(TexCacheEntry &entry, bool force) {
int minFilt = gstate.texfilter & 0x7;
int magFilt = (gstate.texfilter>>8) & 1;
bool sClamp = gstate.isTexCoordClampedS();
bool tClamp = gstate.isTexCoordClampedT();
// Always force !!
force = true;
void TextureCacheDX9::GetSamplingParams(int &minFilt, int &magFilt, bool &sClamp, bool &tClamp, float &lodBias, int maxLevel) {
minFilt = gstate.texfilter & 0x7;
magFilt = (gstate.texfilter>>8) & 1;
sClamp = gstate.isTexCoordClampedS();
tClamp = gstate.isTexCoordClampedT();
bool noMip = (gstate.texlevel & 0xFFFFFF) == 0x000001 || (gstate.texlevel & 0xFFFFFF) == 0x100001 ; // Fix texlevel at 0
float lodBias = 0.0;
if (entry.maxLevel == 0) {
if (maxLevel == 0) {
// Enforce no mip filtering, for safety.
minFilt &= 1; // no mipmaps yet
lodBias = 0.0f;
} else {
// Texture lod bias should be signed.
lodBias = (float)(int)(s8)((gstate.texlevel >> 16) & 0xFF) / 16.0f;
}
if ((g_Config.iTexFiltering == LINEAR || (g_Config.iTexFiltering == LINEARFMV && g_iNumVideos)) && !gstate.isColorTestEnabled()) {
if (g_Config.iTexFiltering == LINEARFMV && g_iNumVideos > 0 && (gstate.getTextureDimension(0) & 0xF) >= 9) {
magFilt |= 1;
minFilt |= 1;
}
if (g_Config.iTexFiltering == NEAREST) {
if (g_Config.iTexFiltering == LINEAR && (!gstate.isColorTestEnabled() || IsColorTestTriviallyTrue())) {
// TODO: IsAlphaTestTriviallyTrue() is unsafe here. vertexFullAlpha is not calculated yet.
if (!gstate.isAlphaTestEnabled() || IsAlphaTestTriviallyTrue()) {
magFilt |= 1;
minFilt |= 1;
}
}
bool forceNearest = g_Config.iTexFiltering == NEAREST;
// Force Nearest when color test enabled and rendering resolution greater than 480x272
if ((gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue()) && g_Config.iInternalResolution != 1 && gstate.isModeThrough()) {
// Some games use 0 as the color test color, which won't be too bad if it bleeds.
// Fuchsia and green, etc. are the problem colors.
if (gstate.getColorTestRef() != 0) {
forceNearest = true;
}
}
if (forceNearest) {
magFilt &= ~1;
minFilt &= ~1;
}
if (!g_Config.bMipMap || noMip) {
magFilt &= 1;
minFilt &= 1;
}
}
void TextureCacheDX9::UpdateSamplingParams(TexCacheEntry &entry, bool force) {
int minFilt;
int magFilt;
bool sClamp;
bool tClamp;
float lodBias;
GetSamplingParams(minFilt, magFilt, sClamp, tClamp, lodBias, entry.maxLevel);
if (entry.maxLevel != 0) {
if (force || entry.lodBias != lodBias) {
#ifndef USING_GLES2
GETexLevelMode mode = gstate.getTexLevelMode();
switch (mode) {
case GE_TEXLEVEL_MODE_AUTO:
// TODO
break;
case GE_TEXLEVEL_MODE_CONST:
// TODO
break;
case GE_TEXLEVEL_MODE_SLOPE:
// TODO
break;
}
#endif
entry.lodBias = lodBias;
}
}
dxstate.texMinFilter.set(MinFilt[minFilt]);
dxstate.texMipFilter.set(MipFilt[minFilt]);
@ -523,8 +686,38 @@ void TextureCacheDX9::UpdateSamplingParams(TexCacheEntry &entry, bool force) {
dxstate.texAddressV.set(tClamp ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP);
}
void TextureCacheDX9::SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) {
int minFilt;
int magFilt;
bool sClamp;
bool tClamp;
float lodBias;
GetSamplingParams(minFilt, magFilt, sClamp, tClamp, lodBias, 0);
dxstate.texMinFilter.set(MinFilt[minFilt]);
dxstate.texMipFilter.set(MipFilt[minFilt]);
dxstate.texMagFilter.set(MagFilt[magFilt]);
// Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case.
// This happens whether we have OES_texture_npot or not.
int w = gstate.getTextureWidth(0);
int h = gstate.getTextureHeight(0);
if (w != bufferWidth || h != bufferHeight) {
return;
}
dxstate.texAddressU.set(sClamp ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP);
dxstate.texAddressV.set(tClamp ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP);
}
void TextureCacheDX9::StartFrame() {
lastBoundTexture = INVALID_TEX;
timesInvalidatedAllThisFrame_ = 0;
if (texelsScaledThisFrame_) {
// INFO_LOG(G3D, "Scaled %i texels", texelsScaledThisFrame_);
}
texelsScaledThisFrame_ = 0;
if (clearCacheNextFrame_) {
Clear(true);
clearCacheNextFrame_ = false;
@ -540,7 +733,6 @@ static inline u32 MiniHash(const u32 *ptr) {
static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat format) {
const u32 sizeInRAM = (textureBitsPerPixel[format] * bufw * h) / 8;
const u32 *checkp = (const u32 *) Memory::GetPointer(addr);
u32 check = 0;
return DoQuickTexHash(checkp, sizeInRAM);
}
@ -551,14 +743,29 @@ inline bool TextureCacheDX9::TexCacheEntry::Matches(u16 dim2, u8 format2, int ma
void TextureCacheDX9::LoadClut() {
u32 clutAddr = gstate.getClutAddress();
clutTotalBytes_ = gstate.getClutLoadBytes();
if (Memory::IsValidAddress(clutAddr)) {
#ifdef _M_SSE
int numBlocks = gstate.getClutLoadBlocks();
clutTotalBytes_ = numBlocks * 32;
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
__m128i *dest = (__m128i *)clutBufRaw_;
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
__m128i data1 = _mm_loadu_si128(source);
__m128i data2 = _mm_loadu_si128(source + 1);
_mm_store_si128(dest, data1);
_mm_store_si128(dest + 1, data2);
}
#else
clutTotalBytes_ = gstate.getClutLoadBytes();
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, clutTotalBytes_);
#endif
} else {
clutTotalBytes_ = gstate.getClutLoadBytes();
memset(clutBufRaw_, 0xFF, clutTotalBytes_);
}
// Reload the clut next time.
clutLastFormat_ = 0xFFFFFFFF;
clutMaxBytes_ = std::max(clutMaxBytes_, clutTotalBytes_);
}
void TextureCacheDX9::UpdateCurrentClut() {
@ -576,7 +783,7 @@ void TextureCacheDX9::UpdateCurrentClut() {
clutAlphaLinear_ = false;
clutAlphaLinearColor_ = 0;
if (gstate.getClutPaletteFormat() == GE_CMODE_16BIT_ABGR4444 && gstate.isClutIndexSimple()) {
const u16_le *clut = (const u16_le*)GetCurrentClut<u16>();
const u16_le *clut = GetCurrentClut<u16_le>();
clutAlphaLinear_ = true;
clutAlphaLinearColor_ = clut[15] & 0xFFF0;
for (int i = 0; i < 16; ++i) {
@ -585,6 +792,7 @@ void TextureCacheDX9::UpdateCurrentClut() {
break;
}
// Alpha 0 doesn't matter.
// TODO: Well, depending on blend mode etc, it can actually matter, although unlikely.
if (i != 0 && (clut[i] >> 12) != clutAlphaLinearColor_) {
clutAlphaLinear_ = false;
break;
@ -604,11 +812,13 @@ inline u32 TextureCacheDX9::GetCurrentClutHash() {
return clutHash_;
}
void TextureCacheDX9::SetTextureFramebuffer(TexCacheEntry *entry)
{
entry->framebuffer->usageFlags |= FB_USAGE_TEXTURE;
void TextureCacheDX9::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer) {
_dbg_assert_msg_(G3D, framebuffer != nullptr, "Framebuffer must not be null.");
framebuffer->usageFlags |= FB_USAGE_TEXTURE;
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
if (useBufferedRendering) {
// TODO: Depal
// For now, let's not bind FBOs that we know are off (invalidHint will be -1.)
// But let's still not use random memory.
if (entry->framebuffer->fbo) {
@ -620,20 +830,69 @@ void TextureCacheDX9::SetTextureFramebuffer(TexCacheEntry *entry)
pD3Ddevice->SetTexture(0, NULL);
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
}
gstate_c.textureFullAlpha = framebuffer->format == GE_FORMAT_565;
gstate_c.textureSimpleAlpha = gstate_c.textureFullAlpha;
// Keep the framebuffer alive.
framebuffer->last_frame_used = gpuStats.numFlips;
// We need to force it, since we may have set it on a texture before attaching.
UpdateSamplingParams(*entry, true);
gstate_c.curTextureWidth = entry->framebuffer->width;
gstate_c.curTextureHeight = entry->framebuffer->height;
gstate_c.curTextureWidth = framebuffer->bufferWidth;
gstate_c.curTextureHeight = framebuffer->bufferHeight;
gstate_c.flipTexture = false;
gstate_c.bgraTexture = false;
gstate_c.textureFullAlpha = entry->framebuffer->format == GE_FORMAT_565;
gstate_c.curTextureXOffset = fbTexInfo_[entry->addr].xOffset;
gstate_c.curTextureYOffset = fbTexInfo_[entry->addr].yOffset;
gstate_c.needShaderTexClamp = gstate_c.curTextureWidth != (u32)gstate.getTextureWidth(0) || gstate_c.curTextureHeight != (u32)gstate.getTextureHeight(0);
if (gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0) {
gstate_c.needShaderTexClamp = true;
}
SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
} else {
if (entry->framebuffer->fbo)
entry->framebuffer->fbo = 0;
if (framebuffer->fbo) {
fbo_destroy(framebuffer->fbo);
framebuffer->fbo = 0;
}
pD3Ddevice->SetTexture(0, NULL);
gstate_c.needShaderTexClamp = false;
}
}
bool TextureCacheDX9::SetOffsetTexture(u32 offset) {
if (g_Config.iRenderingMode != FB_BUFFERED_MODE) {
return false;
}
u32 texaddr = gstate.getTextureAddress(0);
if (!Memory::IsValidAddress(texaddr) || !Memory::IsValidAddress(texaddr + offset)) {
return false;
}
u64 cachekey = (u64)(texaddr & 0x3FFFFFFF) << 32;
TexCache::iterator iter = cache.find(cachekey);
if (iter == cache.end()) {
return false;
}
TexCacheEntry *entry = &iter->second;
bool success = false;
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i];
if (AttachFramebuffer(entry, framebuffer->fb_address, framebuffer, offset)) {
success = true;
}
}
if (success && entry->framebuffer) {
SetTextureFramebuffer(entry, entry->framebuffer);
lastBoundTexture = INVALID_TEX;
entry->lastFrame = gpuStats.numFlips;
return true;
}
return false;
}
void TextureCacheDX9::SetTexture(bool force) {
#ifdef DEBUG_TEXTURES
if (SetDebugTexture()) {
@ -689,6 +948,7 @@ void TextureCacheDX9::SetTexture(bool force) {
TexCache::iterator iter = cache.find(cachekey);
TexCacheEntry *entry = NULL;
gstate_c.flipTexture = false;
gstate_c.needShaderTexClamp = false;
gstate_c.bgraTexture = true;
gstate_c.skipDrawReason &= ~SKIPDRAW_BAD_FB_TEXTURE;
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
@ -703,7 +963,7 @@ void TextureCacheDX9::SetTexture(bool force) {
// Check for FBO - slow!
if (entry->framebuffer) {
if (match) {
SetTextureFramebuffer(entry);
SetTextureFramebuffer(entry, entry->framebuffer);
lastBoundTexture = INVALID_TEX;
entry->lastFrame = gpuStats.numFlips;
return;
@ -714,9 +974,19 @@ void TextureCacheDX9::SetTexture(bool force) {
}
}
bool rehash = (entry->status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_UNRELIABLE;
bool rehash = entry->GetHashStatus() == TexCacheEntry::STATUS_UNRELIABLE;
bool doDelete = true;
// First let's see if another texture with the same address had a hashfail.
if (entry->status & TexCacheEntry::STATUS_CLUT_RECHECK) {
// Always rehash in this case, if one changed the rest all probably did.
rehash = true;
entry->status &= ~TexCacheEntry::STATUS_CLUT_RECHECK;
} else if ((gstate_c.textureChanged & TEXCHANGE_UPDATED) == 0) {
// Okay, just some parameter change - the data didn't change, no need to rehash.
rehash = false;
}
if (match) {
if (entry->lastFrame != gpuStats.numFlips) {
u32 diff = gpuStats.numFlips - entry->lastFrame;
@ -737,7 +1007,7 @@ void TextureCacheDX9::SetTexture(bool force) {
}
// If it's not huge or has been invalidated many times, recheck the whole texture.
if (entry->invalidHint > 180 || (entry->invalidHint > 15 && dim <= 0x909)) {
if (entry->invalidHint > 180 || (entry->invalidHint > 15 && (dim >> 8) < 9 && (dim & 0xF) < 9)) {
entry->invalidHint = 0;
rehash = true;
}
@ -749,21 +1019,25 @@ void TextureCacheDX9::SetTexture(bool force) {
rehash = false;
}
if (rehash && (entry->status & TexCacheEntry::STATUS_MASK) != TexCacheEntry::STATUS_RELIABLE) {
if (rehash && entry->GetHashStatus() != TexCacheEntry::STATUS_RELIABLE) {
fullhash = QuickTexHash(texaddr, bufw, w, h, format);
if (fullhash != entry->fullhash) {
hashFail = true;
} else if ((entry->status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_UNRELIABLE && entry->numFrames > TexCacheEntry::FRAMES_REGAIN_TRUST) {
} else if (entry->GetHashStatus() != TexCacheEntry::STATUS_HASHING && entry->numFrames > TexCacheEntry::FRAMES_REGAIN_TRUST) {
// Reset to STATUS_HASHING.
if (g_Config.bTextureBackoffCache) {
entry->status &= ~TexCacheEntry::STATUS_MASK;
entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
}
entry->status &= ~TexCacheEntry::STATUS_CHANGE_FREQUENT;
}
}
if (hashFail) {
match = false;
entry->status |= TexCacheEntry::STATUS_UNRELIABLE;
if (entry->numFrames < TEXCACHE_FRAME_CHANGE_FREQUENT) {
entry->status |= TexCacheEntry::STATUS_CHANGE_FREQUENT;
}
entry->numFrames = 0;
// Don't give up just yet. Let's try the secondary cache if it's been invalidated before.
@ -783,7 +1057,7 @@ void TextureCacheDX9::SetTexture(bool force) {
match = true;
}
} else {
secondKey = entry->fullhash | (u64)entry->cluthash << 32;
secondKey = entry->fullhash | ((u64)entry->cluthash << 32);
secondCache[secondKey] = *entry;
doDelete = false;
}
@ -792,6 +1066,11 @@ void TextureCacheDX9::SetTexture(bool force) {
}
}
if (match && (entry->status & TexCacheEntry::STATUS_TO_SCALE) && g_Config.iTexScalingLevel != 1 && texelsScaledThisFrame_ < TEXCACHE_MAX_TEXELS_SCALED) {
// INFO_LOG(G3D, "Reloading texture to do the scaling we skipped..");
match = false;
}
if (match) {
// TODO: Mark the entry reliable if it's been safe for long enough?
//got one!
@ -799,7 +1078,8 @@ void TextureCacheDX9::SetTexture(bool force) {
if (entry->texture != lastBoundTexture) {
pD3Ddevice->SetTexture(0, entry->texture);
lastBoundTexture = entry->texture;
gstate_c.textureFullAlpha = (entry->status & TexCacheEntry::STATUS_ALPHA_MASK) == TexCacheEntry::STATUS_ALPHA_FULL;
gstate_c.textureFullAlpha = entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL;
gstate_c.textureSimpleAlpha = entry->GetAlphaStatus() != TexCacheEntry::STATUS_ALPHA_UNKNOWN;
}
UpdateSamplingParams(*entry, false);
VERBOSE_LOG(G3D, "Texture at %08x Found in Cache, applying", texaddr);
@ -820,8 +1100,20 @@ void TextureCacheDX9::SetTexture(bool force) {
entry->ReleaseTexture();
}
}
if (entry->status == TexCacheEntry::STATUS_RELIABLE) {
entry->status = TexCacheEntry::STATUS_HASHING;
// Clear the reliable bit if set.
if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
}
// Also, mark any textures with the same address but different clut. They need rechecking.
if (cluthash != 0) {
const u64 cachekeyMin = (u64)(texaddr & 0x3FFFFFFF) << 32;
const u64 cachekeyMax = cachekeyMin + (1ULL << 32);
for (auto it = cache.lower_bound(cachekeyMin), end = cache.upper_bound(cachekeyMax); it != end; ++it) {
if (it->second.cluthash != cluthash) {
it->second.status |= TexCacheEntry::STATUS_CLUT_RECHECK;
}
}
}
}
} else {
@ -848,6 +1140,7 @@ void TextureCacheDX9::SetTexture(bool force) {
entry->lastFrame = gpuStats.numFlips;
entry->framebuffer = 0;
entry->maxLevel = maxLevel;
entry->lodBias = 0.0f;
entry->dim = gstate.getTextureDimension(0);
entry->bufw = bufw;
@ -870,29 +1163,19 @@ void TextureCacheDX9::SetTexture(bool force) {
// Before we go reading the texture from memory, let's check for render-to-texture.
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i];
// This is a rough heuristic, because sometimes our framebuffers are too tall.
static const u32 MAX_SUBAREA_Y_OFFSET = 32;
// Must be in VRAM so | 0x04000000 it is.
const u64 cacheKeyStart = (u64)(framebuffer->fb_address | 0x04000000) << 32;
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
const u64 cacheKeyEnd = cacheKeyStart + ((u64)(framebuffer->fb_stride * MAX_SUBAREA_Y_OFFSET) << 32);
if (cachekey >= cacheKeyStart && cachekey < cacheKeyEnd) {
AttachFramebuffer(entry, framebuffer->fb_address | 0x04000000, framebuffer, cachekey == cacheKeyStart);
}
AttachFramebuffer(entry, framebuffer->fb_address, framebuffer);
}
// If we ended up with a framebuffer, attach it - no texture decoding needed.
if (entry->framebuffer) {
SetTextureFramebuffer(entry);
SetTextureFramebuffer(entry, entry->framebuffer);
lastBoundTexture = INVALID_TEX;
entry->lastFrame = gpuStats.numFlips;
return;
}
// Adjust maxLevel to actually present levels..
bool badMipSizes = false;
for (int i = 0; i <= maxLevel; i++) {
// If encountering levels pointing to nothing, adjust max level.
u32 levelTexaddr = gstate.getTextureAddress(i);
@ -900,10 +1183,57 @@ void TextureCacheDX9::SetTexture(bool force) {
maxLevel = i - 1;
break;
}
if (i > 0) {
int tw = gstate.getTextureWidth(i);
int th = gstate.getTextureHeight(i);
if (tw != 1 && tw != (gstate.getTextureWidth(i - 1) >> 1))
badMipSizes = true;
else if (th != 1 && th != (gstate.getTextureHeight(i - 1) >> 1))
badMipSizes = true;
}
}
LoadTextureLevel(*entry, 0, replaceImages);
// In addition, simply don't load more than level 0 if g_Config.bMipMap is false.
if (!g_Config.bMipMap) {
maxLevel = 0;
}
// If GLES3 is available, we can preallocate the storage, which makes texture loading more efficient.
D3DFORMAT dstFmt = GetDestFormat(format, gstate.getClutPaletteFormat());
int scaleFactor;
// Auto-texture scale upto 5x rendering resolution
if (g_Config.iTexScalingLevel == 0) {
scaleFactor = g_Config.iInternalResolution;
if (scaleFactor == 0) {
scaleFactor = (PSP_CoreParameter().renderWidth + 479) / 480;
}
scaleFactor = std::min(4, scaleFactor);
if (scaleFactor == 3) {
scaleFactor = 2;
}
} else {
scaleFactor = g_Config.iTexScalingLevel;
}
// Don't scale the PPGe texture.
if (entry->addr > 0x05000000 && entry->addr < 0x08800000)
scaleFactor = 1;
if (scaleFactor != 1 && (entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED) {
entry->status |= TexCacheEntry::STATUS_TO_SCALE;
scaleFactor = 1;
// INFO_LOG(G3D, "Skipped scaling for now..");
} else {
entry->status &= ~TexCacheEntry::STATUS_TO_SCALE;
texelsScaledThisFrame_ += w * h;
}
}
LoadTextureLevel(*entry, 0, replaceImages, scaleFactor, dstFmt);
pD3Ddevice->SetTexture(0, entry->texture);
lastBoundTexture = entry->texture;
@ -911,15 +1241,42 @@ void TextureCacheDX9::SetTexture(bool force) {
//glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, anisotropyLevel);
pD3Ddevice->SetSamplerState(0, D3DSAMP_MAXANISOTROPY, anisotropyLevel);
UpdateSamplingParams(*entry, true);
gstate_c.textureFullAlpha = entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL;
gstate_c.textureSimpleAlpha = entry->GetAlphaStatus() != TexCacheEntry::STATUS_ALPHA_UNKNOWN;
gstate_c.textureFullAlpha = (entry->status & TexCacheEntry::STATUS_ALPHA_MASK) == TexCacheEntry::STATUS_ALPHA_FULL;
UpdateSamplingParams(*entry, true);
}
D3DFORMAT TextureCacheDX9::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const {
switch (format) {
case GE_TFMT_CLUT4:
case GE_TFMT_CLUT8:
case GE_TFMT_CLUT16:
case GE_TFMT_CLUT32:
return getClutDestFormat(clutFormat);
case GE_TFMT_4444:
return D3DFMT_A4R4G4B4;
case GE_TFMT_5551:
return D3DFMT_A1R5G5B5;
case GE_TFMT_5650:
return D3DFMT_R5G6B5;
case GE_TFMT_8888:
case GE_TFMT_DXT1:
case GE_TFMT_DXT3:
case GE_TFMT_DXT5:
default:
return D3DFMT_A8R8G8B8;
}
}
void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, u32 &dstFmt, int *bufwout) {
void *finalBuf = NULL;
u32 texaddr = gstate.getTextureAddress(level);
if (texaddr & 0x00600000 && Memory::IsVRAMAddress(texaddr)) {
// This means it's in a mirror, possibly a swizzled mirror. Let's report.
WARN_LOG_REPORT_ONCE(texmirror, G3D, "Decoding texture from VRAM mirror at %08x swizzle=%d", texaddr, gstate.isTextureSwizzled() ? 1 : 0);
}
int bufw = GetTextureBufw(level, texaddr, format);
if (bufwout)
@ -931,8 +1288,6 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
switch (format) {
case GE_TFMT_CLUT4:
{
dstFmt = getClutDestFormat(clutformat);
const bool mipmapShareClut = gstate.isClutSharedForMipmaps();
const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
@ -990,19 +1345,16 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
break;
case GE_TFMT_CLUT8:
dstFmt = getClutDestFormat(gstate.getClutPaletteFormat());
texByteAlign = texByteAlignMap[gstate.getClutPaletteFormat()];
finalBuf = ReadIndexedTex(level, texptr, 1, dstFmt, bufw);
break;
case GE_TFMT_CLUT16:
dstFmt = getClutDestFormat(gstate.getClutPaletteFormat());
texByteAlign = texByteAlignMap[gstate.getClutPaletteFormat()];
finalBuf = ReadIndexedTex(level, texptr, 2, dstFmt, bufw);
break;
case GE_TFMT_CLUT32:
dstFmt = getClutDestFormat(gstate.getClutPaletteFormat());
texByteAlign = texByteAlignMap[gstate.getClutPaletteFormat()];
finalBuf = ReadIndexedTex(level, texptr, 4, dstFmt, bufw);
break;
@ -1010,12 +1362,6 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
case GE_TFMT_4444:
case GE_TFMT_5551:
case GE_TFMT_5650:
if (format == GE_TFMT_4444)
dstFmt = D3DFMT_A4R4G4B4;
else if (format == GE_TFMT_5551)
dstFmt = D3DFMT_A1R5G5B5;
else if (format == GE_TFMT_5650)
dstFmt = D3DFMT_R5G6B5;
texByteAlign = 2;
if (!gstate.isTextureSwizzled()) {
@ -1032,7 +1378,6 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
break;
case GE_TFMT_8888:
dstFmt = D3DFMT_A8R8G8B8;
if (!gstate.isTextureSwizzled()) {
// Special case: if we don't need to deal with packing, we don't need to copy.
//if (w == bufw) {
@ -1045,15 +1390,13 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
Memory::Memcpy(tmpTexBuf32.data(), texaddr, len * sizeof(u32));
finalBuf = tmpTexBuf32.data();
}
}
else {
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
finalBuf = UnswizzleFromMem(texptr, bufw, 4, level);
}
break;
case GE_TFMT_DXT1:
dstFmt = D3DFMT_A8R8G8B8;
{
int minw = std::min(bufw, w);
tmpTexBuf32.resize(std::max(bufw, w) * h);
@ -1074,7 +1417,6 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
break;
case GE_TFMT_DXT3:
dstFmt = D3DFMT_A8R8G8B8;
{
int minw = std::min(bufw, w);
tmpTexBuf32.resize(std::max(bufw, w) * h);
@ -1094,8 +1436,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
}
break;
case GE_TFMT_DXT5: // These work fine now
dstFmt = D3DFMT_A8R8G8B8;
case GE_TFMT_DXT5:
{
int minw = std::min(bufw, w);
tmpTexBuf32.resize(std::max(bufw, w) * h);
@ -1246,13 +1587,10 @@ static inline void copyTexture(int xoffset, int yoffset, int w, int h, int pitch
}
}
void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages) {
void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages, int scaleFactor, u32 dstFmt) {
// TODO: only do this once
u32 texByteAlign = 1;
// TODO: Look into using BGRA for 32-bit textures when the GL_EXT_texture_format_BGRA8888 extension is available, as it's faster than RGBA on some chips.
u32 dstFmt = 0;
GEPaletteFormat clutformat = gstate.getClutPaletteFormat();
int bufw;
void *finalBuf = DecodeTextureLevel(GETextureFormat(entry.format), clutformat, level, texByteAlign, dstFmt, &bufw);
@ -1266,26 +1604,19 @@ void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, int level, bool rep
gpuStats.numTexturesDecoded++;
u32 *pixelData = (u32 *)finalBuf;
int scaleFactor = g_Config.iTexScalingLevel;
// Don't scale the PPGe texture.
if (entry.addr > 0x05000000 && entry.addr < 0x08800000)
scaleFactor = 1;
if (scaleFactor > 1 && entry.numInvalidated == 0)
if (scaleFactor > 1 && (entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0)
scaler.Scale(pixelData, dstFmt, w, h, scaleFactor);
// Or always?
if (entry.numInvalidated == 0) {
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, bufw, w, h);
entry.status = ((entry.status & ~TexCacheEntry::STATUS_ALPHA_MASK) | alphaStatus);
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, w, w, h);
entry.SetAlphaStatus(alphaStatus, level);
} else {
entry.status |= TexCacheEntry::STATUS_ALPHA_UNKNOWN;
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
}
// Ignore mip map atm
if (level == 0) {
if (replaceImages) {
if (replaceImages) {
// Unset texture
pD3Ddevice->SetTexture(0, NULL);

View File

@ -27,6 +27,8 @@
namespace DX9 {
struct VirtualFramebufferDX9;
class FramebufferManagerDX9;
class ShaderManagerDX9;
enum TextureFiltering {
AUTO = 1,
@ -41,13 +43,13 @@ enum FramebufferNotification {
NOTIFY_FB_DESTROYED,
};
class TextureCacheDX9
{
class TextureCacheDX9 {
public:
TextureCacheDX9();
~TextureCacheDX9();
void SetTexture(bool t = false);
void SetTexture(bool force = false);
bool SetOffsetTexture(u32 offset);
void Clear(bool delete_them);
void StartFrame();
@ -60,6 +62,13 @@ public:
// are being rendered to. This is barebones so far.
void NotifyFramebuffer(u32 address, VirtualFramebufferDX9 *framebuffer, FramebufferNotification msg);
void SetFramebufferManager(FramebufferManagerDX9 *fbManager) {
framebufferManager_ = fbManager;
}
void SetShaderManager(ShaderManagerDX9 *sm) {
shaderManager_ = sm;
}
size_t NumLoadedTextures() const {
return cache.size();
}
@ -67,7 +76,8 @@ public:
// Only used by Qt UI?
bool DecodeTexture(u8 *output, GPUgstate state);
private:
void ForgetLastTexture();
// Wow this is starting to grow big. Soon need to start looking at resizing it.
// Must stay a POD.
struct TexCacheEntry {
@ -76,14 +86,19 @@ private:
enum Status {
STATUS_HASHING = 0x00,
STATUS_RELIABLE = 0x01, // cache, don't hash
STATUS_UNRELIABLE = 0x02, // never cache
STATUS_RELIABLE = 0x01, // Don't bother rehashing.
STATUS_UNRELIABLE = 0x02, // Always recheck hash.
STATUS_MASK = 0x03,
STATUS_ALPHA_UNKNOWN = 0x04,
STATUS_ALPHA_FULL = 0x00, // Has no alpha channel, or always full alpha.
STATUS_ALPHA_SIMPLE = 0x08, // Like above, but also has 0 alpha (e.g. 5551.)
STATUS_ALPHA_FULL = 0x00, // Has no alpha channel, or always full alpha.
STATUS_ALPHA_SIMPLE = 0x08, // Like above, but also has 0 alpha (e.g. 5551.)
STATUS_ALPHA_MASK = 0x0c,
STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 15 frames in between.)
STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail.
STATUS_DEPALETTIZE = 0x40, // Needs to go through a depalettize pass.
STATUS_TO_SCALE = 0x80, // Pending texture scaling in a later frame.
};
// Status, but int so we can zero initialize.
@ -99,12 +114,33 @@ private:
u8 format;
u16 dim;
u16 bufw;
LPDIRECT3DTEXTURE9 texture; //GLuint
LPDIRECT3DTEXTURE9 texture;
int invalidHint;
u32 fullhash;
u32 cluthash;
int maxLevel;
float lodBias;
Status GetHashStatus() {
return Status(status & STATUS_MASK);
}
void SetHashStatus(Status newStatus) {
status = (status & ~STATUS_MASK) | newStatus;
}
Status GetAlphaStatus() {
return Status(status & STATUS_ALPHA_MASK);
}
void SetAlphaStatus(Status newStatus) {
status = (status & ~STATUS_ALPHA_MASK) | newStatus;
}
void SetAlphaStatus(Status newStatus, int level) {
// For non-level zero, only set more restrictive.
if (newStatus == STATUS_ALPHA_UNKNOWN || level == 0) {
SetAlphaStatus(newStatus);
} else if (newStatus == STATUS_ALPHA_SIMPLE && GetAlphaStatus() == STATUS_ALPHA_FULL) {
SetAlphaStatus(STATUS_ALPHA_SIMPLE);
}
}
bool Matches(u16 dim2, u8 format2, int maxLevel2);
void ReleaseTexture() {
if (texture) {
@ -113,28 +149,44 @@ private:
}
};
void SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight);
private:
typedef std::map<u64, TexCacheEntry> TexCache;
void Decimate(); // Run this once per frame to get rid of old textures.
void DeleteTexture(TexCache::iterator it);
void *UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 bytesPerPixel, u32 level);
void *ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, u32 dstFmt, int bufw);
void GetSamplingParams(int &minFilt, int &magFilt, bool &sClamp, bool &tClamp, float &lodBias, int maxLevel);
void UpdateSamplingParams(TexCacheEntry &entry, bool force);
void LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages);
void LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages, int scaleFactor, u32 dstFmt);
D3DFORMAT GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
void *DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, u32 &dstFmt, int *bufw = 0);
TexCacheEntry::Status CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h);
template <typename T>
const T *GetCurrentClut();
u32 GetCurrentClutHash();
void UpdateCurrentClut();
void AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer, bool exactMatch);
bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer, u32 texaddrOffset = 0);
void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer);
void SetTextureFramebuffer(TexCacheEntry *entry);
void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer);
TexCacheEntry *GetEntryAt(u32 texaddr);
typedef std::map<u64, TexCacheEntry> TexCache;
TexCache cache;
TexCache secondCache;
std::vector<VirtualFramebufferDX9 *> fbCache_;
// Separate to keep main texture cache size down.
struct AttachedFramebufferInfo {
u32 xOffset;
u32 yOffset;
};
std::map<u32, AttachedFramebufferInfo> fbTexInfo_;
void AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer, const AttachedFramebufferInfo &fbInfo);
void AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer, const AttachedFramebufferInfo &fbInfo);
bool clearCacheNextFrame_;
bool lowMemoryMode_;
TextureScalerDX9 scaler;
@ -150,6 +202,7 @@ private:
u32 *clutBuf_;
u32 clutHash_;
u32 clutTotalBytes_;
u32 clutMaxBytes_;
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
bool clutAlphaLinear_;
u16 clutAlphaLinearColor_;
@ -158,6 +211,13 @@ private:
float maxAnisotropyLevel;
int decimationCounter_;
int texelsScaledThisFrame_;
int timesInvalidatedAllThisFrame_;
FramebufferManagerDX9 *framebufferManager_;
ShaderManagerDX9 *shaderManager_;
};
D3DFORMAT getClutDestFormat(GEPaletteFormat format);
};

View File

@ -61,10 +61,10 @@ namespace {
for(int y = l; y < u; ++y) {
for(int x = 0; x < width; ++x) {
u32 val = data[y*width + x];
u32 r = ((val>>12) & 0xF) * 17;
u32 g = ((val>> 8) & 0xF) * 17;
u32 b = ((val>> 4) & 0xF) * 17;
u32 a = ((val>> 0) & 0xF) * 17;
u32 r = ((val>> 0) & 0xF) * 17;
u32 g = ((val>> 4) & 0xF) * 17;
u32 b = ((val>> 8) & 0xF) * 17;
u32 a = ((val>>12) & 0xF) * 17;
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
}
}
@ -75,9 +75,9 @@ namespace {
for(int y = l; y < u; ++y) {
for(int x = 0; x < width; ++x) {
u32 val = data[y*width + x];
u32 r = Convert5To8((val>>11) & 0x1F);
u32 r = Convert5To8((val ) & 0x1F);
u32 g = Convert6To8((val>> 5) & 0x3F);
u32 b = Convert5To8((val ) & 0x1F);
u32 b = Convert5To8((val>>11) & 0x1F);
out[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
}
}
@ -88,10 +88,10 @@ namespace {
for(int y = l; y < u; ++y) {
for(int x = 0; x < width; ++x) {
u32 val = data[y*width + x];
u32 r = Convert5To8((val>>11) & 0x1F);
u32 g = Convert5To8((val>> 6) & 0x1F);
u32 b = Convert5To8((val>> 1) & 0x1F);
u32 a = (val & 0x1) * 255;
u32 r = Convert5To8((val>> 0) & 0x1F);
u32 g = Convert5To8((val>> 5) & 0x1F);
u32 b = Convert5To8((val>>10) & 0x1F);
u32 a = ((val >> 15) & 0x1) * 255;
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
}
}

View File

@ -1092,6 +1092,8 @@ void TransformDrawEngineDX9::DoFlush() {
vai->numVerts = indexGen.VertexCount();
vai->prim = indexGen.Prim();
vai->maxIndex = indexGen.MaxIndex();
vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0;
goto rotateVBO;
}
@ -1167,6 +1169,7 @@ void TransformDrawEngineDX9::DoFlush() {
gpuStats.numCachedDrawCalls++;
useElements = vai->ebo ? true : false;
gpuStats.numCachedVertsDrawn += vai->numVerts;
gstate_c.vertexFullAlpha = vai->flags & VAI_FLAG_VERTEXFULLALPHA;
}
vb_ = vai->vbo;
ib_ = vai->ebo;
@ -1192,6 +1195,8 @@ void TransformDrawEngineDX9::DoFlush() {
maxIndex = vai->maxIndex;
prim = static_cast<GEPrimitiveType>(vai->prim);
gstate_c.vertexFullAlpha = vai->flags & VAI_FLAG_VERTEXFULLALPHA;
break;
}
@ -1221,6 +1226,12 @@ rotateVBO:
}
DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
} else {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
IDirect3DVertexDeclaration9 *pHardwareVertexDecl = SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), dec_->VertexType());
@ -1246,6 +1257,13 @@ rotateVBO:
}
} else {
DecodeVerts();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
} else {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
prim = indexGen.Prim();
// Undo the strip optimization, not supported by the SW code yet.
@ -1264,6 +1282,7 @@ rotateVBO:
numDrawCalls = 0;
vertexCountInDrawCalls = 0;
prevPrim_ = GE_PRIM_INVALID;
gstate_c.vertexFullAlpha = true;
host->GPUNotifyDraw();
}

View File

@ -44,6 +44,10 @@ class FramebufferManagerDX9;
// DRAWN_ONCE -> death
// DRAWN_RELIABLE -> death
enum {
VAI_FLAG_VERTEXFULLALPHA = 1,
};
// Don't bother storing information about draws smaller than this.
enum {
@ -64,8 +68,10 @@ public:
lastFrame = gpuStats.numFlips;
numVerts = 0;
drawsUntilNextFullHash = 0;
flags = 0;
}
~VertexArrayInfoDX9();
enum Status {
VAI_NEW,
VAI_HASHING,
@ -80,7 +86,6 @@ public:
LPDIRECT3DVERTEXBUFFER9 vbo;
LPDIRECT3DINDEXBUFFER9 ebo;
// Precalculated parameter for drawRangeElements
u16 numVerts;
u16 maxIndex;
@ -92,9 +97,9 @@ public:
int numFrames;
int lastFrame; // So that we can forget.
u16 drawsUntilNextFullHash;
u8 flags;
};
// Handles transform, lighting and drawing.
class TransformDrawEngineDX9 {
public:

View File

@ -231,6 +231,7 @@ void VertexDecoderDX9::Step_Color565() const
c[1] = Convert6To8((cdata>>5) & 0x3f);
c[2] = Convert5To8((cdata>>11) & 0x1f);
c[3] = 255;
// Always full alpha.
}
void VertexDecoderDX9::Step_Color5551() const
@ -241,6 +242,7 @@ void VertexDecoderDX9::Step_Color5551() const
c[1] = Convert5To8((cdata>>5) & 0x1f);
c[2] = Convert5To8((cdata>>10) & 0x1f);
c[3] = (cdata >> 15) ? 255 : 0;
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] != 0;
}
void VertexDecoderDX9::Step_Color4444() const
@ -251,6 +253,7 @@ void VertexDecoderDX9::Step_Color4444() const
c[1] = Convert4To8((cdata >> (4)) & 0xF);
c[2] = Convert4To8((cdata >> (8)) & 0xF);
c[3] = Convert4To8((cdata >> (12)) & 0xF);
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}
void VertexDecoderDX9::Step_Color8888() const
@ -261,6 +264,7 @@ void VertexDecoderDX9::Step_Color8888() const
c[1] = cdata[1];
c[2] = cdata[2];
c[3] = cdata[3];
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}
void VertexDecoderDX9::Step_Color565Morph() const
@ -270,16 +274,16 @@ void VertexDecoderDX9::Step_Color565Morph() const
{
float w = gstate_c.morphWeights[n];
u16 cdata = (u16)(*(u16_le*)(ptr_ + onesize_*n + coloff));
col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f);
col[1] += w * ((cdata>>5) & 0x3f) * (255.0f / 63.0f);
col[2] += w * ((cdata>>11) & 0x1f) * (255.0f / 31.0f);
}
u8 *c = decoded_ + decFmt.c0off;
c[0] = (u8)col[0];
c[1] = (u8)col[1];
c[2] = (u8)col[2];
for (int i = 0; i < 3; i++) {
c[i] = clamp_u8((int)col[i]);
}
c[3] = 255;
// Always full alpha.
}
void VertexDecoderDX9::Step_Color5551Morph() const
@ -295,10 +299,10 @@ void VertexDecoderDX9::Step_Color5551Morph() const
col[3] += w * ((cdata>>15) ? 255.0f : 0.0f);
}
u8 *c = decoded_ + decFmt.c0off;
c[0] = (u8)col[0];
c[1] = (u8)col[1];
c[2] = (u8)col[2];
c[3] = (u8)col[3];
for (int i = 0; i < 4; i++) {
c[i] = clamp_u8((int)col[i]);
}
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}
void VertexDecoderDX9::Step_Color4444Morph() const
@ -312,10 +316,10 @@ void VertexDecoderDX9::Step_Color4444Morph() const
col[j] += w * ((cdata >> (j * 4)) & 0xF) * (255.0f / 15.0f);
}
u8 *c = decoded_ + decFmt.c0off;
c[0] = (u8)col[0];
c[1] = (u8)col[1];
c[2] = (u8)col[2];
c[3] = (u8)col[3];
for (int i = 0; i < 4; i++) {
c[i] = clamp_u8((int)col[i]);
}
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}
void VertexDecoderDX9::Step_Color8888Morph() const
@ -329,10 +333,10 @@ void VertexDecoderDX9::Step_Color8888Morph() const
col[j] += w * cdata[j];
}
u8 *c = decoded_ + decFmt.c0off;
c[0] = (u8)col[0];
c[1] = (u8)col[1];
c[2] = (u8)col[2];
c[3] = (u8)col[3];
for (int i = 0; i < 4; i++) {
c[i] = clamp_u8((int)col[i]);
}
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
}
void VertexDecoderDX9::Step_NormalS8() const

View File

@ -90,10 +90,10 @@ bool IsAlphaTestTriviallyTrue() {
#endif
return (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed())) || (
(!gstate.isStencilTestEnabled() &&
!gstate.isDepthTestEnabled() &&
!gstate.isDepthTestEnabled() &&
gstate.getAlphaTestRef() == 0 &&
gstate.isAlphaBlendEnabled() &&
gstate.getBlendFuncA() == GE_SRCBLEND_SRCALPHA &&
gstate.getBlendFuncA() == GE_SRCBLEND_SRCALPHA &&
safeDestFactors[(int)gstate.getBlendFuncB()]));
}

View File

@ -112,6 +112,7 @@ void TextureCache::Clear(bool delete_them) {
cache.clear();
secondCache.clear();
}
fbTexInfo_.clear();
}
void TextureCache::DeleteTexture(TexCache::iterator it) {
@ -890,7 +891,7 @@ void TextureCache::LoadClut() {
u32 clutAddr = gstate.getClutAddress();
if (Memory::IsValidAddress(clutAddr)) {
#ifdef _M_SSE
int numBlocks = gstate.getClutLoadBlocks();
int numBlocks = gstate.getClutLoadBlocks();
clutTotalBytes_ = numBlocks * 32;
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
__m128i *dest = (__m128i *)clutBufRaw_;
@ -936,7 +937,7 @@ void TextureCache::UpdateCurrentClut() {
clutAlphaLinear_ = false;
clutAlphaLinearColor_ = 0;
if (gstate.getClutPaletteFormat() == GE_CMODE_16BIT_ABGR4444 && gstate.isClutIndexSimple()) {
const u16 *clut = GetCurrentClut<u16>();
const u16_le *clut = GetCurrentClut<u16_le>();
clutAlphaLinear_ = true;
clutAlphaLinearColor_ = clut[15] & 0xFFF0;
for (int i = 0; i < 16; ++i) {
@ -1725,8 +1726,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
finalBuf = tmpTexBuf32.data();
ConvertColors(finalBuf, texptr, dstFmt, bufw * h);
}
}
else {
} else {
tmpTexBuf32.resize(std::max(bufw, w) * h);
finalBuf = UnswizzleFromMem(texptr, bufw, 4, level);
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);