mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-03-03 03:27:19 +00:00
Merge pull request #6864 from unknownbrackets/d3d9
Update Direct3D 9 texture cache
This commit is contained in:
commit
7d6377295a
@ -498,6 +498,8 @@ namespace DX9 {
|
||||
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED);
|
||||
|
||||
vfb->last_frame_render = gpuStats.numFlips;
|
||||
vfb->last_frame_used = 0;
|
||||
vfb->last_frame_attached = 0;
|
||||
frameLastFramebufUsed = gpuStats.numFlips;
|
||||
vfbs_.push_back(vfb);
|
||||
ClearBuffer();
|
||||
@ -1104,8 +1106,8 @@ namespace DX9 {
|
||||
hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY);
|
||||
if (SUCCEEDED(hr)) {
|
||||
// TODO: Handle the other formats? We don't currently create them, I think.
|
||||
buffer.Allocate(locked.Pitch / 4, vfb->renderHeight, GPU_DBG_FORMAT_8888_BGRA, false);
|
||||
memcpy(buffer.GetData(), locked.pBits, locked.Pitch * vfb->renderHeight);
|
||||
buffer.Allocate(locked.Pitch / 4, desc.Height, GPU_DBG_FORMAT_8888_BGRA, false);
|
||||
memcpy(buffer.GetData(), locked.pBits, locked.Pitch * desc.Height);
|
||||
offscreen->UnlockRect();
|
||||
success = true;
|
||||
}
|
||||
|
@ -51,6 +51,7 @@ enum {
|
||||
|
||||
struct VirtualFramebufferDX9 {
|
||||
int last_frame_used;
|
||||
int last_frame_attached;
|
||||
int last_frame_render;
|
||||
bool memoryUpdated;
|
||||
|
||||
|
@ -388,6 +388,8 @@ DIRECTX9_GPU::DIRECTX9_GPU()
|
||||
transformDraw_.SetFramebufferManager(&framebufferManager_);
|
||||
framebufferManager_.SetTextureCache(&textureCache_);
|
||||
framebufferManager_.SetShaderManager(shaderManager_);
|
||||
textureCache_.SetFramebufferManager(&framebufferManager_);
|
||||
textureCache_.SetShaderManager(shaderManager_);
|
||||
|
||||
// Sanity check gstate
|
||||
if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) {
|
||||
@ -1582,6 +1584,7 @@ bool DIRECTX9_GPU::GetCurrentTexture(GPUDebugBuffer &buffer, int level) {
|
||||
|
||||
LPDIRECT3DBASETEXTURE9 baseTex;
|
||||
LPDIRECT3DTEXTURE9 tex;
|
||||
LPDIRECT3DSURFACE9 offscreen = nullptr;
|
||||
HRESULT hr;
|
||||
|
||||
bool success;
|
||||
@ -1594,6 +1597,22 @@ bool DIRECTX9_GPU::GetCurrentTexture(GPUDebugBuffer &buffer, int level) {
|
||||
tex->GetLevelDesc(level, &desc);
|
||||
RECT rect = {0, 0, desc.Width, desc.Height};
|
||||
hr = tex->LockRect(level, &locked, &rect, D3DLOCK_READONLY);
|
||||
|
||||
// If it fails, this means it's a render-to-texture, so we have to get creative.
|
||||
if (FAILED(hr)) {
|
||||
LPDIRECT3DSURFACE9 renderTarget;
|
||||
hr = tex->GetSurfaceLevel(level, &renderTarget);
|
||||
if (SUCCEEDED(hr)) {
|
||||
hr = pD3Ddevice->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL);
|
||||
if (SUCCEEDED(hr)) {
|
||||
hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen);
|
||||
if (SUCCEEDED(hr)) {
|
||||
hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (SUCCEEDED(hr)) {
|
||||
GPUDebugBufferFormat fmt;
|
||||
int pixelSize;
|
||||
@ -1626,7 +1645,12 @@ bool DIRECTX9_GPU::GetCurrentTexture(GPUDebugBuffer &buffer, int level) {
|
||||
} else {
|
||||
success = false;
|
||||
}
|
||||
tex->UnlockRect(level);
|
||||
if (offscreen) {
|
||||
offscreen->UnlockRect();
|
||||
offscreen->Release();
|
||||
} else {
|
||||
tex->UnlockRect(level);
|
||||
}
|
||||
}
|
||||
tex->Release();
|
||||
}
|
||||
|
@ -43,12 +43,8 @@ static const bool safeDestFactors[16] = {
|
||||
true, //GE_DSTBLEND_FIXB,
|
||||
};
|
||||
|
||||
static bool IsAlphaTestTriviallyTrue() {
|
||||
GEComparison alphaTestFunc = gstate.getAlphaTestFunction();
|
||||
int alphaTestRef = gstate.getAlphaTestRef();
|
||||
int alphaTestMask = gstate.getAlphaTestMask();
|
||||
|
||||
switch (alphaTestFunc) {
|
||||
bool IsAlphaTestTriviallyTrue() {
|
||||
switch (gstate.getAlphaTestFunction()) {
|
||||
case GE_COMP_NEVER:
|
||||
return false;
|
||||
|
||||
@ -56,36 +52,60 @@ static bool IsAlphaTestTriviallyTrue() {
|
||||
return true;
|
||||
|
||||
case GE_COMP_GEQUAL:
|
||||
return alphaTestRef == 0;
|
||||
if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
|
||||
return true; // If alpha is full, it doesn't matter what the ref value is.
|
||||
return gstate.getAlphaTestRef() == 0;
|
||||
|
||||
// Non-zero check. If we have no depth testing (and thus no depth writing), and an alpha func that will result in no change if zero alpha, get rid of the alpha test.
|
||||
// Speeds up Lumines by a LOT on PowerVR.
|
||||
case GE_COMP_NOTEQUAL:
|
||||
if (gstate.getAlphaTestRef() == 255) {
|
||||
// Likely to be rare. Let's just skip the vertexFullAlpha optimization here instead of adding
|
||||
// complicated code to discard the draw or whatnot.
|
||||
return false;
|
||||
}
|
||||
// Fallthrough on purpose
|
||||
|
||||
case GE_COMP_GREATER:
|
||||
{
|
||||
bool depthTest = gstate.isDepthTestEnabled();
|
||||
#if 0
|
||||
// Easy way to check the values in the debugger without ruining && early-out
|
||||
bool doTextureAlpha = gstate.isTextureAlphaUsed();
|
||||
bool stencilTest = gstate.isStencilTestEnabled();
|
||||
GEBlendSrcFactor src = gstate.getBlendFuncA();
|
||||
GEBlendDstFactor dst = gstate.getBlendFuncB();
|
||||
if (!stencilTest && !depthTest && alphaTestRef == 0 && gstate.isAlphaBlendEnabled() && src == GE_SRCBLEND_SRCALPHA && safeDestFactors[(int)dst])
|
||||
return true;
|
||||
return false;
|
||||
bool depthTest = gstate.isDepthTestEnabled();
|
||||
GEComparison depthTestFunc = gstate.getDepthTestFunction();
|
||||
int alphaRef = gstate.getAlphaTestRef();
|
||||
int blendA = gstate.getBlendFuncA();
|
||||
bool blendEnabled = gstate.isAlphaBlendEnabled();
|
||||
int blendB = gstate.getBlendFuncA();
|
||||
#endif
|
||||
return (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed())) || (
|
||||
(!gstate.isStencilTestEnabled() &&
|
||||
!gstate.isDepthTestEnabled() &&
|
||||
gstate.getAlphaTestRef() == 0 &&
|
||||
gstate.isAlphaBlendEnabled() &&
|
||||
gstate.getBlendFuncA() == GE_SRCBLEND_SRCALPHA &&
|
||||
safeDestFactors[(int)gstate.getBlendFuncB()]));
|
||||
}
|
||||
|
||||
case GE_COMP_LEQUAL:
|
||||
return alphaTestRef == 255;
|
||||
return gstate.getAlphaTestRef() == 255;
|
||||
|
||||
case GE_COMP_EQUAL:
|
||||
case GE_COMP_LESS:
|
||||
return false;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool IsColorTestTriviallyTrue() {
|
||||
GEComparison colorTestFunc = gstate.getColorTestFunction();
|
||||
switch (colorTestFunc) {
|
||||
bool IsAlphaTestAgainstZero() {
|
||||
return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF;
|
||||
}
|
||||
|
||||
bool IsColorTestTriviallyTrue() {
|
||||
switch (gstate.getColorTestFunction()) {
|
||||
case GE_COMP_NEVER:
|
||||
return false;
|
||||
|
||||
|
@ -24,15 +24,12 @@
|
||||
|
||||
namespace DX9 {
|
||||
|
||||
struct FragmentShaderIDDX9
|
||||
{
|
||||
FragmentShaderIDDX9() {d[0] = 0xFFFFFFFF;}
|
||||
void clear() {d[0] = 0xFFFFFFFF;}
|
||||
u32 d[1];
|
||||
bool operator < (const FragmentShaderIDDX9 &other) const
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
|
||||
{
|
||||
struct FragmentShaderIDDX9 {
|
||||
FragmentShaderIDDX9() {clear();}
|
||||
void clear() {d[0] = 0xFFFFFFFF; d[1] = 0xFFFFFFFF;}
|
||||
u32 d[2];
|
||||
bool operator < (const FragmentShaderIDDX9 &other) const {
|
||||
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
|
||||
if (d[i] < other.d[i])
|
||||
return true;
|
||||
if (d[i] > other.d[i])
|
||||
@ -40,10 +37,8 @@ struct FragmentShaderIDDX9
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool operator == (const FragmentShaderIDDX9 &other) const
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
|
||||
{
|
||||
bool operator == (const FragmentShaderIDDX9 &other) const {
|
||||
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
|
||||
if (d[i] != other.d[i])
|
||||
return false;
|
||||
}
|
||||
@ -51,9 +46,11 @@ struct FragmentShaderIDDX9
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id);
|
||||
|
||||
void GenerateFragmentShaderDX9(char *buffer);
|
||||
|
||||
bool IsAlphaTestAgainstZero();
|
||||
bool IsAlphaTestTriviallyTrue();
|
||||
bool IsColorTestTriviallyTrue();
|
||||
|
||||
};
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "Core/Reporting.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Directx9/PixelShaderGeneratorDX9.h"
|
||||
#include "GPU/Directx9/TextureCacheDX9.h"
|
||||
#include "GPU/Directx9/FramebufferDX9.h"
|
||||
#include "GPU/Directx9/helper/dx_state.h"
|
||||
@ -49,15 +50,29 @@ namespace DX9 {
|
||||
// Try to be prime to other decimation intervals.
|
||||
#define TEXCACHE_DECIMATION_INTERVAL 13
|
||||
|
||||
TextureCacheDX9::TextureCacheDX9() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) {
|
||||
// Changes more frequent than this will be considered "frequent" and prevent texture scaling.
|
||||
#define TEXCACHE_FRAME_CHANGE_FREQUENT 6
|
||||
|
||||
#define TEXCACHE_MAX_TEXELS_SCALED (256*256) // Per frame
|
||||
|
||||
TextureCacheDX9::TextureCacheDX9() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), texelsScaledThisFrame_(0) {
|
||||
timesInvalidatedAllThisFrame_ = 0;
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
|
||||
// This is 5MB of temporary storage. Might be possible to shrink it.
|
||||
tmpTexBuf32.resize(1024 * 512); // 2MB
|
||||
tmpTexBuf16.resize(1024 * 512); // 1MB
|
||||
tmpTexBufRearrange.resize(1024 * 512); // 2MB
|
||||
|
||||
// Aren't these way too big?
|
||||
clutBufConverted_ = (u32 *)AllocateAlignedMemory(4096 * sizeof(u32), 16); // 16KB
|
||||
clutBufRaw_ = (u32 *)AllocateAlignedMemory(4096 * sizeof(u32), 16); // 16KB
|
||||
|
||||
// Zap these so that reads from uninitialized parts of the CLUT look the same in
|
||||
// release and debug
|
||||
memset(clutBufConverted_, 0, 4096 * sizeof(u32));
|
||||
memset(clutBufRaw_, 0, 4096 * sizeof(u32));
|
||||
|
||||
maxAnisotropyLevel = 16;
|
||||
SetupTextureDecoder();
|
||||
}
|
||||
@ -85,6 +100,21 @@ void TextureCacheDX9::Clear(bool delete_them) {
|
||||
cache.clear();
|
||||
secondCache.clear();
|
||||
}
|
||||
fbTexInfo_.clear();
|
||||
}
|
||||
|
||||
void TextureCacheDX9::DeleteTexture(TexCache::iterator it) {
|
||||
it->second.ReleaseTexture();
|
||||
auto fbInfo = fbTexInfo_.find(it->second.addr);
|
||||
if (fbInfo != fbTexInfo_.end()) {
|
||||
fbTexInfo_.erase(fbInfo);
|
||||
}
|
||||
cache.erase(it);
|
||||
}
|
||||
|
||||
void TextureCacheDX9::ForgetLastTexture() {
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
|
||||
}
|
||||
|
||||
// Removes old textures.
|
||||
@ -100,8 +130,7 @@ void TextureCacheDX9::Decimate() {
|
||||
int killAge = lowMemoryMode_ ? TEXTURE_KILL_AGE_LOWMEM : TEXTURE_KILL_AGE;
|
||||
for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ) {
|
||||
if (iter->second.lastFrame + killAge < gpuStats.numFlips) {
|
||||
iter->second.ReleaseTexture();
|
||||
cache.erase(iter++);
|
||||
DeleteTexture(iter++);
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
@ -131,16 +160,19 @@ void TextureCacheDX9::Invalidate(u32 addr, int size, GPUInvalidationType type) {
|
||||
|
||||
// They could invalidate inside the texture, let's just give a bit of leeway.
|
||||
const int LARGEST_TEXTURE_SIZE = 512 * 512 * 4;
|
||||
u64 startKey = addr - LARGEST_TEXTURE_SIZE;
|
||||
u64 endKey = addr + size + LARGEST_TEXTURE_SIZE;
|
||||
const u64 startKey = (u64)(addr - LARGEST_TEXTURE_SIZE) << 32;
|
||||
u64 endKey = (u64)(addr + size + LARGEST_TEXTURE_SIZE) << 32;
|
||||
if (endKey < startKey) {
|
||||
endKey = (u64)-1;
|
||||
}
|
||||
|
||||
for (TexCache::iterator iter = cache.lower_bound(startKey), end = cache.upper_bound(endKey); iter != end; ++iter) {
|
||||
u32 texAddr = iter->second.addr;
|
||||
u32 texEnd = iter->second.addr + iter->second.sizeInRAM;
|
||||
|
||||
if (texAddr < addr_end && addr < texEnd) {
|
||||
if ((iter->second.status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) {
|
||||
// Clear status -> STATUS_HASHING.
|
||||
iter->second.status &= ~TexCacheEntry::STATUS_MASK;
|
||||
if (iter->second.GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
|
||||
iter->second.SetHashStatus(TexCacheEntry::STATUS_HASHING);
|
||||
}
|
||||
if (type != GPU_INVALIDATE_ALL) {
|
||||
gpuStats.numTextureInvalidations++;
|
||||
@ -160,10 +192,14 @@ void TextureCacheDX9::InvalidateAll(GPUInvalidationType /*unused*/) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (timesInvalidatedAllThisFrame_ > 5) {
|
||||
return;
|
||||
}
|
||||
timesInvalidatedAllThisFrame_++;
|
||||
|
||||
for (TexCache::iterator iter = cache.begin(), end = cache.end(); iter != end; ++iter) {
|
||||
if ((iter->second.status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) {
|
||||
// Clear status -> STATUS_HASHING.
|
||||
iter->second.status &= ~TexCacheEntry::STATUS_MASK;
|
||||
if (iter->second.GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
|
||||
iter->second.SetHashStatus(TexCacheEntry::STATUS_HASHING);
|
||||
}
|
||||
if (!iter->second.framebuffer) {
|
||||
iter->second.invalidHint++;
|
||||
@ -176,68 +212,144 @@ void TextureCacheDX9::ClearNextFrame() {
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline void AttachFramebufferValid(T &entry, VirtualFramebufferDX9 *framebuffer) {
|
||||
void TextureCacheDX9::AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer, const AttachedFramebufferInfo &fbInfo) {
|
||||
const bool hasInvalidFramebuffer = entry->framebuffer == 0 || entry->invalidHint == -1;
|
||||
const bool hasOlderFramebuffer = entry->framebuffer != 0 && entry->framebuffer->last_frame_render < framebuffer->last_frame_render;
|
||||
if (hasInvalidFramebuffer || hasOlderFramebuffer) {
|
||||
bool hasFartherFramebuffer = false;
|
||||
if (!hasInvalidFramebuffer && !hasOlderFramebuffer) {
|
||||
// If it's valid, but the offset is greater, then we still win.
|
||||
if (fbTexInfo_[entry->addr].yOffset == fbInfo.yOffset)
|
||||
hasFartherFramebuffer = fbTexInfo_[entry->addr].xOffset > fbInfo.xOffset;
|
||||
else
|
||||
hasFartherFramebuffer = fbTexInfo_[entry->addr].yOffset > fbInfo.yOffset;
|
||||
}
|
||||
if (hasInvalidFramebuffer || hasOlderFramebuffer || hasFartherFramebuffer) {
|
||||
entry->framebuffer = framebuffer;
|
||||
entry->invalidHint = 0;
|
||||
entry->status &= ~TextureCacheDX9::TexCacheEntry::STATUS_DEPALETTIZE;
|
||||
fbTexInfo_[entry->addr] = fbInfo;
|
||||
framebuffer->last_frame_attached = gpuStats.numFlips;
|
||||
host->GPUNotifyTextureAttachment(entry->addr);
|
||||
} else if (entry->framebuffer == framebuffer) {
|
||||
framebuffer->last_frame_attached = gpuStats.numFlips;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void AttachFramebufferInvalid(T &entry, VirtualFramebufferDX9 *framebuffer) {
|
||||
void TextureCacheDX9::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer, const AttachedFramebufferInfo &fbInfo) {
|
||||
if (entry->framebuffer == 0 || entry->framebuffer == framebuffer) {
|
||||
entry->framebuffer = framebuffer;
|
||||
entry->invalidHint = -1;
|
||||
entry->status &= ~TextureCacheDX9::TexCacheEntry::STATUS_DEPALETTIZE;
|
||||
fbTexInfo_[entry->addr] = fbInfo;
|
||||
host->GPUNotifyTextureAttachment(entry->addr);
|
||||
}
|
||||
}
|
||||
|
||||
inline void TextureCacheDX9::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer, bool exactMatch) {
|
||||
bool TextureCacheDX9::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer, u32 texaddrOffset) {
|
||||
static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32;
|
||||
|
||||
AttachedFramebufferInfo fbInfo = {0};
|
||||
|
||||
const u64 mirrorMask = 0x00600000;
|
||||
// Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors.
|
||||
const u32 addr = (address | 0x04000000) & 0x3FFFFFFF & ~mirrorMask;
|
||||
const u32 texaddr = ((entry->addr + texaddrOffset) & ~mirrorMask);
|
||||
const bool noOffset = texaddr == addr;
|
||||
const bool exactMatch = noOffset && entry->format < 4;
|
||||
const u32 h = 1 << ((entry->dim >> 8) & 0xf);
|
||||
// 512 on a 272 framebuffer is sane, so let's be lenient.
|
||||
const u32 minSubareaHeight = h / 4;
|
||||
|
||||
// If they match exactly, it's non-CLUT and from the top left.
|
||||
if (exactMatch) {
|
||||
// Apply to non-buffered and buffered mode only.
|
||||
if (!(g_Config.iRenderingMode == FB_NON_BUFFERED_MODE || g_Config.iRenderingMode == FB_BUFFERED_MODE))
|
||||
return;
|
||||
return false;
|
||||
|
||||
DEBUG_LOG(G3D, "Render to texture detected at %08x!", address);
|
||||
if (!entry->framebuffer || entry->invalidHint == -1) {
|
||||
if (entry->format != framebuffer->format) {
|
||||
WARN_LOG_REPORT_ONCE(diffFormat1, G3D, "Render to texture with different formats %d != %d", entry->format, framebuffer->format);
|
||||
// If it already has one, let's hope that one is correct.
|
||||
AttachFramebufferInvalid(entry, framebuffer);
|
||||
} else {
|
||||
AttachFramebufferValid(entry, framebuffer);
|
||||
if (framebuffer->fb_stride != entry->bufw) {
|
||||
WARN_LOG_REPORT_ONCE(diffStrides1, G3D, "Render to texture with different strides %d != %d", entry->bufw, framebuffer->fb_stride);
|
||||
}
|
||||
if (entry->format != framebuffer->format) {
|
||||
WARN_LOG_REPORT_ONCE(diffFormat1, G3D, "Render to texture with different formats %d != %d", entry->format, framebuffer->format);
|
||||
// Let's avoid using it when we know the format is wrong. May be a video/etc. updating memory.
|
||||
// However, some games use a different format to clear the buffer.
|
||||
if (framebuffer->last_frame_attached + 1 < gpuStats.numFlips) {
|
||||
DetachFramebuffer(entry, address, framebuffer);
|
||||
}
|
||||
// TODO: Delete the original non-fbo texture too.
|
||||
} else {
|
||||
AttachFramebufferValid(entry, framebuffer, fbInfo);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
// Apply to buffered mode only.
|
||||
if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE))
|
||||
return;
|
||||
return false;
|
||||
|
||||
// 3rd Birthday (and possibly other games) render to a 16 bit clut texture.
|
||||
const bool compatFormat = framebuffer->format == entry->format
|
||||
|| (framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32)
|
||||
|| (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16);
|
||||
const bool clutFormat =
|
||||
(framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) ||
|
||||
(framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16);
|
||||
|
||||
// Is it at least the right stride?
|
||||
if (framebuffer->fb_stride == entry->bufw && compatFormat) {
|
||||
if (framebuffer->format != entry->format) {
|
||||
WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address);
|
||||
// TODO: Use an FBO to translate the palette?
|
||||
AttachFramebufferValid(entry, framebuffer);
|
||||
} else if ((entry->addr - address) / entry->bufw < framebuffer->height) {
|
||||
WARN_LOG_REPORT_ONCE(subarea, G3D, "Render to area containing texture at %08x", address);
|
||||
// TODO: Keep track of the y offset.
|
||||
// If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect.
|
||||
AttachFramebufferInvalid(entry, framebuffer);
|
||||
const u32 bitOffset = (texaddr - addr) * 8;
|
||||
const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry->format]);
|
||||
fbInfo.yOffset = pixelOffset / entry->bufw;
|
||||
fbInfo.xOffset = pixelOffset % entry->bufw;
|
||||
|
||||
if (framebuffer->fb_stride != entry->bufw) {
|
||||
if (noOffset) {
|
||||
WARN_LOG_REPORT_ONCE(diffStrides2, G3D, "Render to texture using CLUT with different strides %d != %d", entry->bufw, framebuffer->fb_stride);
|
||||
} else {
|
||||
// Assume any render-to-tex with different bufw + offset is a render from ram.
|
||||
DetachFramebuffer(entry, address, framebuffer);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) {
|
||||
// Can't be inside the framebuffer then, ram. Detach to be safe.
|
||||
DetachFramebuffer(entry, address, framebuffer);
|
||||
return false;
|
||||
}
|
||||
// Trying to play it safe. Below 0x04110000 is almost always framebuffers.
|
||||
// TODO: Maybe we can reduce this check and find a better way above 0x04110000?
|
||||
if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000) {
|
||||
WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible render to texture at %08x +%dx%d / %dx%d", address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height);
|
||||
DetachFramebuffer(entry, address, framebuffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for CLUT. The framebuffer is always RGB, but it can be interpreted as a CLUT texture.
|
||||
// 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture.
|
||||
if (clutFormat) {
|
||||
if (!noOffset) {
|
||||
WARN_LOG_REPORT_ONCE(subareaClut, G3D, "Render to texture using CLUT with offset at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset);
|
||||
}
|
||||
AttachFramebufferValid(entry, framebuffer, fbInfo);
|
||||
entry->status |= TexCacheEntry::STATUS_DEPALETTIZE;
|
||||
// We'll validate it compiles later.
|
||||
return true;
|
||||
} else if (entry->format == GE_TFMT_CLUT8 || entry->format == GE_TFMT_CLUT4) {
|
||||
ERROR_LOG_REPORT_ONCE(fourEightBit, G3D, "4 and 8-bit CLUT format not supported for framebuffers");
|
||||
}
|
||||
|
||||
// This is either normal or we failed to generate a shader to depalettize
|
||||
if (framebuffer->format == entry->format || clutFormat) {
|
||||
if (framebuffer->format != entry->format) {
|
||||
WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address);
|
||||
AttachFramebufferValid(entry, framebuffer, fbInfo);
|
||||
return true;
|
||||
} else {
|
||||
WARN_LOG_REPORT_ONCE(subarea, G3D, "Render to area containing texture at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset);
|
||||
// If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect.
|
||||
AttachFramebufferInvalid(entry, framebuffer, fbInfo);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with incompatible formats %d != %d at %08x", entry->format, framebuffer->format, address);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void TextureCacheDX9::DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer) {
|
||||
@ -248,14 +360,18 @@ inline void TextureCacheDX9::DetachFramebuffer(TexCacheEntry *entry, u32 address
|
||||
}
|
||||
|
||||
void TextureCacheDX9::NotifyFramebuffer(u32 address, VirtualFramebufferDX9 *framebuffer, FramebufferNotification msg) {
|
||||
// This is a rough heuristic, because sometimes our framebuffers are too tall.
|
||||
static const u32 MAX_SUBAREA_Y_OFFSET = 32;
|
||||
|
||||
// Must be in VRAM so | 0x04000000 it is.
|
||||
const u64 cacheKey = (u64)(address | 0x04000000) << 32;
|
||||
// Must be in VRAM so | 0x04000000 it is. Also, ignore memory mirrors.
|
||||
// These checks are mainly to reduce scanning all textures.
|
||||
const u32 addr = (address | 0x04000000) & 0x3F9FFFFF;
|
||||
const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u64 cacheKey = (u64)addr << 32;
|
||||
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
|
||||
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
|
||||
const u64 cacheKeyEnd = cacheKey + ((u64)(framebuffer->fb_stride * MAX_SUBAREA_Y_OFFSET) << 32);
|
||||
const u64 cacheKeyEnd = cacheKey + ((u64)(framebuffer->fb_stride * framebuffer->height * bpp) << 32);
|
||||
|
||||
// The first mirror starts at 0x04200000 and there are 3. We search all for framebuffers.
|
||||
const u64 mirrorCacheKey = (u64)0x04200000 << 32;
|
||||
const u64 mirrorCacheKeyEnd = (u64)0x04800000 << 32;
|
||||
|
||||
switch (msg) {
|
||||
case NOTIFY_FB_CREATED:
|
||||
@ -265,14 +381,21 @@ void TextureCacheDX9::NotifyFramebuffer(u32 address, VirtualFramebufferDX9 *fram
|
||||
fbCache_.push_back(framebuffer);
|
||||
}
|
||||
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
|
||||
AttachFramebuffer(&it->second, address | 0x04000000, framebuffer, it->first == cacheKey);
|
||||
AttachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
// Let's assume anything in mirrors is fair game to check.
|
||||
for (auto it = cache.lower_bound(mirrorCacheKey), end = cache.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
|
||||
AttachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
break;
|
||||
|
||||
case NOTIFY_FB_DESTROYED:
|
||||
fbCache_.erase(std::remove(fbCache_.begin(), fbCache_.end(), framebuffer), fbCache_.end());
|
||||
for (auto it = cache.lower_bound(cacheKey), end = cache.upper_bound(cacheKeyEnd); it != end; ++it) {
|
||||
DetachFramebuffer(&it->second, address | 0x04000000, framebuffer);
|
||||
DetachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
for (auto it = cache.lower_bound(mirrorCacheKey), end = cache.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
|
||||
DetachFramebuffer(&it->second, addr, framebuffer);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -480,40 +603,80 @@ static const u8 MagFilt[2] = {
|
||||
D3DTEXF_LINEAR
|
||||
};
|
||||
|
||||
void TextureCacheDX9::UpdateSamplingParams(TexCacheEntry &entry, bool force) {
|
||||
int minFilt = gstate.texfilter & 0x7;
|
||||
int magFilt = (gstate.texfilter>>8) & 1;
|
||||
bool sClamp = gstate.isTexCoordClampedS();
|
||||
bool tClamp = gstate.isTexCoordClampedT();
|
||||
|
||||
// Always force !!
|
||||
force = true;
|
||||
void TextureCacheDX9::GetSamplingParams(int &minFilt, int &magFilt, bool &sClamp, bool &tClamp, float &lodBias, int maxLevel) {
|
||||
minFilt = gstate.texfilter & 0x7;
|
||||
magFilt = (gstate.texfilter>>8) & 1;
|
||||
sClamp = gstate.isTexCoordClampedS();
|
||||
tClamp = gstate.isTexCoordClampedT();
|
||||
|
||||
bool noMip = (gstate.texlevel & 0xFFFFFF) == 0x000001 || (gstate.texlevel & 0xFFFFFF) == 0x100001 ; // Fix texlevel at 0
|
||||
|
||||
float lodBias = 0.0;
|
||||
if (entry.maxLevel == 0) {
|
||||
if (maxLevel == 0) {
|
||||
// Enforce no mip filtering, for safety.
|
||||
minFilt &= 1; // no mipmaps yet
|
||||
lodBias = 0.0f;
|
||||
} else {
|
||||
// Texture lod bias should be signed.
|
||||
lodBias = (float)(int)(s8)((gstate.texlevel >> 16) & 0xFF) / 16.0f;
|
||||
}
|
||||
|
||||
if ((g_Config.iTexFiltering == LINEAR || (g_Config.iTexFiltering == LINEARFMV && g_iNumVideos)) && !gstate.isColorTestEnabled()) {
|
||||
if (g_Config.iTexFiltering == LINEARFMV && g_iNumVideos > 0 && (gstate.getTextureDimension(0) & 0xF) >= 9) {
|
||||
magFilt |= 1;
|
||||
minFilt |= 1;
|
||||
}
|
||||
|
||||
if (g_Config.iTexFiltering == NEAREST) {
|
||||
if (g_Config.iTexFiltering == LINEAR && (!gstate.isColorTestEnabled() || IsColorTestTriviallyTrue())) {
|
||||
// TODO: IsAlphaTestTriviallyTrue() is unsafe here. vertexFullAlpha is not calculated yet.
|
||||
if (!gstate.isAlphaTestEnabled() || IsAlphaTestTriviallyTrue()) {
|
||||
magFilt |= 1;
|
||||
minFilt |= 1;
|
||||
}
|
||||
}
|
||||
bool forceNearest = g_Config.iTexFiltering == NEAREST;
|
||||
// Force Nearest when color test enabled and rendering resolution greater than 480x272
|
||||
if ((gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue()) && g_Config.iInternalResolution != 1 && gstate.isModeThrough()) {
|
||||
// Some games use 0 as the color test color, which won't be too bad if it bleeds.
|
||||
// Fuchsia and green, etc. are the problem colors.
|
||||
if (gstate.getColorTestRef() != 0) {
|
||||
forceNearest = true;
|
||||
}
|
||||
}
|
||||
if (forceNearest) {
|
||||
magFilt &= ~1;
|
||||
minFilt &= ~1;
|
||||
}
|
||||
|
||||
if (!g_Config.bMipMap || noMip) {
|
||||
magFilt &= 1;
|
||||
minFilt &= 1;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCacheDX9::UpdateSamplingParams(TexCacheEntry &entry, bool force) {
|
||||
int minFilt;
|
||||
int magFilt;
|
||||
bool sClamp;
|
||||
bool tClamp;
|
||||
float lodBias;
|
||||
GetSamplingParams(minFilt, magFilt, sClamp, tClamp, lodBias, entry.maxLevel);
|
||||
|
||||
if (entry.maxLevel != 0) {
|
||||
if (force || entry.lodBias != lodBias) {
|
||||
#ifndef USING_GLES2
|
||||
GETexLevelMode mode = gstate.getTexLevelMode();
|
||||
switch (mode) {
|
||||
case GE_TEXLEVEL_MODE_AUTO:
|
||||
// TODO
|
||||
break;
|
||||
case GE_TEXLEVEL_MODE_CONST:
|
||||
// TODO
|
||||
break;
|
||||
case GE_TEXLEVEL_MODE_SLOPE:
|
||||
// TODO
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
entry.lodBias = lodBias;
|
||||
}
|
||||
}
|
||||
|
||||
dxstate.texMinFilter.set(MinFilt[minFilt]);
|
||||
dxstate.texMipFilter.set(MipFilt[minFilt]);
|
||||
@ -523,8 +686,38 @@ void TextureCacheDX9::UpdateSamplingParams(TexCacheEntry &entry, bool force) {
|
||||
dxstate.texAddressV.set(tClamp ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP);
|
||||
}
|
||||
|
||||
void TextureCacheDX9::SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) {
|
||||
int minFilt;
|
||||
int magFilt;
|
||||
bool sClamp;
|
||||
bool tClamp;
|
||||
float lodBias;
|
||||
GetSamplingParams(minFilt, magFilt, sClamp, tClamp, lodBias, 0);
|
||||
|
||||
dxstate.texMinFilter.set(MinFilt[minFilt]);
|
||||
dxstate.texMipFilter.set(MipFilt[minFilt]);
|
||||
dxstate.texMagFilter.set(MagFilt[magFilt]);
|
||||
|
||||
// Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case.
|
||||
// This happens whether we have OES_texture_npot or not.
|
||||
int w = gstate.getTextureWidth(0);
|
||||
int h = gstate.getTextureHeight(0);
|
||||
if (w != bufferWidth || h != bufferHeight) {
|
||||
return;
|
||||
}
|
||||
|
||||
dxstate.texAddressU.set(sClamp ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP);
|
||||
dxstate.texAddressV.set(tClamp ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP);
|
||||
}
|
||||
|
||||
void TextureCacheDX9::StartFrame() {
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
timesInvalidatedAllThisFrame_ = 0;
|
||||
|
||||
if (texelsScaledThisFrame_) {
|
||||
// INFO_LOG(G3D, "Scaled %i texels", texelsScaledThisFrame_);
|
||||
}
|
||||
texelsScaledThisFrame_ = 0;
|
||||
if (clearCacheNextFrame_) {
|
||||
Clear(true);
|
||||
clearCacheNextFrame_ = false;
|
||||
@ -540,7 +733,6 @@ static inline u32 MiniHash(const u32 *ptr) {
|
||||
static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat format) {
|
||||
const u32 sizeInRAM = (textureBitsPerPixel[format] * bufw * h) / 8;
|
||||
const u32 *checkp = (const u32 *) Memory::GetPointer(addr);
|
||||
u32 check = 0;
|
||||
|
||||
return DoQuickTexHash(checkp, sizeInRAM);
|
||||
}
|
||||
@ -551,14 +743,29 @@ inline bool TextureCacheDX9::TexCacheEntry::Matches(u16 dim2, u8 format2, int ma
|
||||
|
||||
void TextureCacheDX9::LoadClut() {
|
||||
u32 clutAddr = gstate.getClutAddress();
|
||||
clutTotalBytes_ = gstate.getClutLoadBytes();
|
||||
if (Memory::IsValidAddress(clutAddr)) {
|
||||
#ifdef _M_SSE
|
||||
int numBlocks = gstate.getClutLoadBlocks();
|
||||
clutTotalBytes_ = numBlocks * 32;
|
||||
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
|
||||
__m128i *dest = (__m128i *)clutBufRaw_;
|
||||
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
|
||||
__m128i data1 = _mm_loadu_si128(source);
|
||||
__m128i data2 = _mm_loadu_si128(source + 1);
|
||||
_mm_store_si128(dest, data1);
|
||||
_mm_store_si128(dest + 1, data2);
|
||||
}
|
||||
#else
|
||||
clutTotalBytes_ = gstate.getClutLoadBytes();
|
||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, clutTotalBytes_);
|
||||
#endif
|
||||
} else {
|
||||
clutTotalBytes_ = gstate.getClutLoadBytes();
|
||||
memset(clutBufRaw_, 0xFF, clutTotalBytes_);
|
||||
}
|
||||
// Reload the clut next time.
|
||||
clutLastFormat_ = 0xFFFFFFFF;
|
||||
clutMaxBytes_ = std::max(clutMaxBytes_, clutTotalBytes_);
|
||||
}
|
||||
|
||||
void TextureCacheDX9::UpdateCurrentClut() {
|
||||
@ -576,7 +783,7 @@ void TextureCacheDX9::UpdateCurrentClut() {
|
||||
clutAlphaLinear_ = false;
|
||||
clutAlphaLinearColor_ = 0;
|
||||
if (gstate.getClutPaletteFormat() == GE_CMODE_16BIT_ABGR4444 && gstate.isClutIndexSimple()) {
|
||||
const u16_le *clut = (const u16_le*)GetCurrentClut<u16>();
|
||||
const u16_le *clut = GetCurrentClut<u16_le>();
|
||||
clutAlphaLinear_ = true;
|
||||
clutAlphaLinearColor_ = clut[15] & 0xFFF0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
@ -585,6 +792,7 @@ void TextureCacheDX9::UpdateCurrentClut() {
|
||||
break;
|
||||
}
|
||||
// Alpha 0 doesn't matter.
|
||||
// TODO: Well, depending on blend mode etc, it can actually matter, although unlikely.
|
||||
if (i != 0 && (clut[i] >> 12) != clutAlphaLinearColor_) {
|
||||
clutAlphaLinear_ = false;
|
||||
break;
|
||||
@ -604,11 +812,13 @@ inline u32 TextureCacheDX9::GetCurrentClutHash() {
|
||||
return clutHash_;
|
||||
}
|
||||
|
||||
void TextureCacheDX9::SetTextureFramebuffer(TexCacheEntry *entry)
|
||||
{
|
||||
entry->framebuffer->usageFlags |= FB_USAGE_TEXTURE;
|
||||
void TextureCacheDX9::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer) {
|
||||
_dbg_assert_msg_(G3D, framebuffer != nullptr, "Framebuffer must not be null.");
|
||||
|
||||
framebuffer->usageFlags |= FB_USAGE_TEXTURE;
|
||||
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
|
||||
if (useBufferedRendering) {
|
||||
// TODO: Depal
|
||||
// For now, let's not bind FBOs that we know are off (invalidHint will be -1.)
|
||||
// But let's still not use random memory.
|
||||
if (entry->framebuffer->fbo) {
|
||||
@ -620,20 +830,69 @@ void TextureCacheDX9::SetTextureFramebuffer(TexCacheEntry *entry)
|
||||
pD3Ddevice->SetTexture(0, NULL);
|
||||
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
|
||||
}
|
||||
|
||||
gstate_c.textureFullAlpha = framebuffer->format == GE_FORMAT_565;
|
||||
gstate_c.textureSimpleAlpha = gstate_c.textureFullAlpha;
|
||||
|
||||
// Keep the framebuffer alive.
|
||||
framebuffer->last_frame_used = gpuStats.numFlips;
|
||||
|
||||
// We need to force it, since we may have set it on a texture before attaching.
|
||||
UpdateSamplingParams(*entry, true);
|
||||
gstate_c.curTextureWidth = entry->framebuffer->width;
|
||||
gstate_c.curTextureHeight = entry->framebuffer->height;
|
||||
gstate_c.curTextureWidth = framebuffer->bufferWidth;
|
||||
gstate_c.curTextureHeight = framebuffer->bufferHeight;
|
||||
gstate_c.flipTexture = false;
|
||||
gstate_c.bgraTexture = false;
|
||||
gstate_c.textureFullAlpha = entry->framebuffer->format == GE_FORMAT_565;
|
||||
gstate_c.curTextureXOffset = fbTexInfo_[entry->addr].xOffset;
|
||||
gstate_c.curTextureYOffset = fbTexInfo_[entry->addr].yOffset;
|
||||
gstate_c.needShaderTexClamp = gstate_c.curTextureWidth != (u32)gstate.getTextureWidth(0) || gstate_c.curTextureHeight != (u32)gstate.getTextureHeight(0);
|
||||
if (gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0) {
|
||||
gstate_c.needShaderTexClamp = true;
|
||||
}
|
||||
SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
|
||||
} else {
|
||||
if (entry->framebuffer->fbo)
|
||||
entry->framebuffer->fbo = 0;
|
||||
if (framebuffer->fbo) {
|
||||
fbo_destroy(framebuffer->fbo);
|
||||
framebuffer->fbo = 0;
|
||||
}
|
||||
pD3Ddevice->SetTexture(0, NULL);
|
||||
gstate_c.needShaderTexClamp = false;
|
||||
}
|
||||
}
|
||||
|
||||
bool TextureCacheDX9::SetOffsetTexture(u32 offset) {
|
||||
if (g_Config.iRenderingMode != FB_BUFFERED_MODE) {
|
||||
return false;
|
||||
}
|
||||
u32 texaddr = gstate.getTextureAddress(0);
|
||||
if (!Memory::IsValidAddress(texaddr) || !Memory::IsValidAddress(texaddr + offset)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
u64 cachekey = (u64)(texaddr & 0x3FFFFFFF) << 32;
|
||||
TexCache::iterator iter = cache.find(cachekey);
|
||||
if (iter == cache.end()) {
|
||||
return false;
|
||||
}
|
||||
TexCacheEntry *entry = &iter->second;
|
||||
|
||||
bool success = false;
|
||||
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
|
||||
auto framebuffer = fbCache_[i];
|
||||
if (AttachFramebuffer(entry, framebuffer->fb_address, framebuffer, offset)) {
|
||||
success = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (success && entry->framebuffer) {
|
||||
SetTextureFramebuffer(entry, entry->framebuffer);
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
entry->lastFrame = gpuStats.numFlips;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void TextureCacheDX9::SetTexture(bool force) {
|
||||
#ifdef DEBUG_TEXTURES
|
||||
if (SetDebugTexture()) {
|
||||
@ -689,6 +948,7 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
TexCache::iterator iter = cache.find(cachekey);
|
||||
TexCacheEntry *entry = NULL;
|
||||
gstate_c.flipTexture = false;
|
||||
gstate_c.needShaderTexClamp = false;
|
||||
gstate_c.bgraTexture = true;
|
||||
gstate_c.skipDrawReason &= ~SKIPDRAW_BAD_FB_TEXTURE;
|
||||
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
|
||||
@ -703,7 +963,7 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
// Check for FBO - slow!
|
||||
if (entry->framebuffer) {
|
||||
if (match) {
|
||||
SetTextureFramebuffer(entry);
|
||||
SetTextureFramebuffer(entry, entry->framebuffer);
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
entry->lastFrame = gpuStats.numFlips;
|
||||
return;
|
||||
@ -714,9 +974,19 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
}
|
||||
}
|
||||
|
||||
bool rehash = (entry->status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_UNRELIABLE;
|
||||
bool rehash = entry->GetHashStatus() == TexCacheEntry::STATUS_UNRELIABLE;
|
||||
bool doDelete = true;
|
||||
|
||||
// First let's see if another texture with the same address had a hashfail.
|
||||
if (entry->status & TexCacheEntry::STATUS_CLUT_RECHECK) {
|
||||
// Always rehash in this case, if one changed the rest all probably did.
|
||||
rehash = true;
|
||||
entry->status &= ~TexCacheEntry::STATUS_CLUT_RECHECK;
|
||||
} else if ((gstate_c.textureChanged & TEXCHANGE_UPDATED) == 0) {
|
||||
// Okay, just some parameter change - the data didn't change, no need to rehash.
|
||||
rehash = false;
|
||||
}
|
||||
|
||||
if (match) {
|
||||
if (entry->lastFrame != gpuStats.numFlips) {
|
||||
u32 diff = gpuStats.numFlips - entry->lastFrame;
|
||||
@ -737,7 +1007,7 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
}
|
||||
|
||||
// If it's not huge or has been invalidated many times, recheck the whole texture.
|
||||
if (entry->invalidHint > 180 || (entry->invalidHint > 15 && dim <= 0x909)) {
|
||||
if (entry->invalidHint > 180 || (entry->invalidHint > 15 && (dim >> 8) < 9 && (dim & 0xF) < 9)) {
|
||||
entry->invalidHint = 0;
|
||||
rehash = true;
|
||||
}
|
||||
@ -749,21 +1019,25 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
rehash = false;
|
||||
}
|
||||
|
||||
if (rehash && (entry->status & TexCacheEntry::STATUS_MASK) != TexCacheEntry::STATUS_RELIABLE) {
|
||||
if (rehash && entry->GetHashStatus() != TexCacheEntry::STATUS_RELIABLE) {
|
||||
fullhash = QuickTexHash(texaddr, bufw, w, h, format);
|
||||
if (fullhash != entry->fullhash) {
|
||||
hashFail = true;
|
||||
} else if ((entry->status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_UNRELIABLE && entry->numFrames > TexCacheEntry::FRAMES_REGAIN_TRUST) {
|
||||
} else if (entry->GetHashStatus() != TexCacheEntry::STATUS_HASHING && entry->numFrames > TexCacheEntry::FRAMES_REGAIN_TRUST) {
|
||||
// Reset to STATUS_HASHING.
|
||||
if (g_Config.bTextureBackoffCache) {
|
||||
entry->status &= ~TexCacheEntry::STATUS_MASK;
|
||||
entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
|
||||
}
|
||||
entry->status &= ~TexCacheEntry::STATUS_CHANGE_FREQUENT;
|
||||
}
|
||||
}
|
||||
|
||||
if (hashFail) {
|
||||
match = false;
|
||||
entry->status |= TexCacheEntry::STATUS_UNRELIABLE;
|
||||
if (entry->numFrames < TEXCACHE_FRAME_CHANGE_FREQUENT) {
|
||||
entry->status |= TexCacheEntry::STATUS_CHANGE_FREQUENT;
|
||||
}
|
||||
entry->numFrames = 0;
|
||||
|
||||
// Don't give up just yet. Let's try the secondary cache if it's been invalidated before.
|
||||
@ -783,7 +1057,7 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
match = true;
|
||||
}
|
||||
} else {
|
||||
secondKey = entry->fullhash | (u64)entry->cluthash << 32;
|
||||
secondKey = entry->fullhash | ((u64)entry->cluthash << 32);
|
||||
secondCache[secondKey] = *entry;
|
||||
doDelete = false;
|
||||
}
|
||||
@ -792,6 +1066,11 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
}
|
||||
}
|
||||
|
||||
if (match && (entry->status & TexCacheEntry::STATUS_TO_SCALE) && g_Config.iTexScalingLevel != 1 && texelsScaledThisFrame_ < TEXCACHE_MAX_TEXELS_SCALED) {
|
||||
// INFO_LOG(G3D, "Reloading texture to do the scaling we skipped..");
|
||||
match = false;
|
||||
}
|
||||
|
||||
if (match) {
|
||||
// TODO: Mark the entry reliable if it's been safe for long enough?
|
||||
//got one!
|
||||
@ -799,7 +1078,8 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
if (entry->texture != lastBoundTexture) {
|
||||
pD3Ddevice->SetTexture(0, entry->texture);
|
||||
lastBoundTexture = entry->texture;
|
||||
gstate_c.textureFullAlpha = (entry->status & TexCacheEntry::STATUS_ALPHA_MASK) == TexCacheEntry::STATUS_ALPHA_FULL;
|
||||
gstate_c.textureFullAlpha = entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL;
|
||||
gstate_c.textureSimpleAlpha = entry->GetAlphaStatus() != TexCacheEntry::STATUS_ALPHA_UNKNOWN;
|
||||
}
|
||||
UpdateSamplingParams(*entry, false);
|
||||
VERBOSE_LOG(G3D, "Texture at %08x Found in Cache, applying", texaddr);
|
||||
@ -820,8 +1100,20 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
entry->ReleaseTexture();
|
||||
}
|
||||
}
|
||||
if (entry->status == TexCacheEntry::STATUS_RELIABLE) {
|
||||
entry->status = TexCacheEntry::STATUS_HASHING;
|
||||
// Clear the reliable bit if set.
|
||||
if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
|
||||
entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
|
||||
}
|
||||
|
||||
// Also, mark any textures with the same address but different clut. They need rechecking.
|
||||
if (cluthash != 0) {
|
||||
const u64 cachekeyMin = (u64)(texaddr & 0x3FFFFFFF) << 32;
|
||||
const u64 cachekeyMax = cachekeyMin + (1ULL << 32);
|
||||
for (auto it = cache.lower_bound(cachekeyMin), end = cache.upper_bound(cachekeyMax); it != end; ++it) {
|
||||
if (it->second.cluthash != cluthash) {
|
||||
it->second.status |= TexCacheEntry::STATUS_CLUT_RECHECK;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -848,6 +1140,7 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
entry->lastFrame = gpuStats.numFlips;
|
||||
entry->framebuffer = 0;
|
||||
entry->maxLevel = maxLevel;
|
||||
entry->lodBias = 0.0f;
|
||||
|
||||
entry->dim = gstate.getTextureDimension(0);
|
||||
entry->bufw = bufw;
|
||||
@ -870,29 +1163,19 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
// Before we go reading the texture from memory, let's check for render-to-texture.
|
||||
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
|
||||
auto framebuffer = fbCache_[i];
|
||||
// This is a rough heuristic, because sometimes our framebuffers are too tall.
|
||||
static const u32 MAX_SUBAREA_Y_OFFSET = 32;
|
||||
|
||||
// Must be in VRAM so | 0x04000000 it is.
|
||||
const u64 cacheKeyStart = (u64)(framebuffer->fb_address | 0x04000000) << 32;
|
||||
// If it has a clut, those are the low 32 bits, so it'll be inside this range.
|
||||
// Also, if it's a subsample of the buffer, it'll also be within the FBO.
|
||||
const u64 cacheKeyEnd = cacheKeyStart + ((u64)(framebuffer->fb_stride * MAX_SUBAREA_Y_OFFSET) << 32);
|
||||
|
||||
if (cachekey >= cacheKeyStart && cachekey < cacheKeyEnd) {
|
||||
AttachFramebuffer(entry, framebuffer->fb_address | 0x04000000, framebuffer, cachekey == cacheKeyStart);
|
||||
}
|
||||
AttachFramebuffer(entry, framebuffer->fb_address, framebuffer);
|
||||
}
|
||||
|
||||
// If we ended up with a framebuffer, attach it - no texture decoding needed.
|
||||
if (entry->framebuffer) {
|
||||
SetTextureFramebuffer(entry);
|
||||
SetTextureFramebuffer(entry, entry->framebuffer);
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
entry->lastFrame = gpuStats.numFlips;
|
||||
return;
|
||||
}
|
||||
|
||||
// Adjust maxLevel to actually present levels..
|
||||
bool badMipSizes = false;
|
||||
for (int i = 0; i <= maxLevel; i++) {
|
||||
// If encountering levels pointing to nothing, adjust max level.
|
||||
u32 levelTexaddr = gstate.getTextureAddress(i);
|
||||
@ -900,10 +1183,57 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
maxLevel = i - 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i > 0) {
|
||||
int tw = gstate.getTextureWidth(i);
|
||||
int th = gstate.getTextureHeight(i);
|
||||
if (tw != 1 && tw != (gstate.getTextureWidth(i - 1) >> 1))
|
||||
badMipSizes = true;
|
||||
else if (th != 1 && th != (gstate.getTextureHeight(i - 1) >> 1))
|
||||
badMipSizes = true;
|
||||
}
|
||||
}
|
||||
|
||||
LoadTextureLevel(*entry, 0, replaceImages);
|
||||
// In addition, simply don't load more than level 0 if g_Config.bMipMap is false.
|
||||
if (!g_Config.bMipMap) {
|
||||
maxLevel = 0;
|
||||
}
|
||||
|
||||
// If GLES3 is available, we can preallocate the storage, which makes texture loading more efficient.
|
||||
D3DFORMAT dstFmt = GetDestFormat(format, gstate.getClutPaletteFormat());
|
||||
|
||||
int scaleFactor;
|
||||
// Auto-texture scale upto 5x rendering resolution
|
||||
if (g_Config.iTexScalingLevel == 0) {
|
||||
scaleFactor = g_Config.iInternalResolution;
|
||||
if (scaleFactor == 0) {
|
||||
scaleFactor = (PSP_CoreParameter().renderWidth + 479) / 480;
|
||||
}
|
||||
|
||||
scaleFactor = std::min(4, scaleFactor);
|
||||
if (scaleFactor == 3) {
|
||||
scaleFactor = 2;
|
||||
}
|
||||
} else {
|
||||
scaleFactor = g_Config.iTexScalingLevel;
|
||||
}
|
||||
|
||||
// Don't scale the PPGe texture.
|
||||
if (entry->addr > 0x05000000 && entry->addr < 0x08800000)
|
||||
scaleFactor = 1;
|
||||
|
||||
if (scaleFactor != 1 && (entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED) {
|
||||
entry->status |= TexCacheEntry::STATUS_TO_SCALE;
|
||||
scaleFactor = 1;
|
||||
// INFO_LOG(G3D, "Skipped scaling for now..");
|
||||
} else {
|
||||
entry->status &= ~TexCacheEntry::STATUS_TO_SCALE;
|
||||
texelsScaledThisFrame_ += w * h;
|
||||
}
|
||||
}
|
||||
|
||||
LoadTextureLevel(*entry, 0, replaceImages, scaleFactor, dstFmt);
|
||||
pD3Ddevice->SetTexture(0, entry->texture);
|
||||
lastBoundTexture = entry->texture;
|
||||
|
||||
@ -911,15 +1241,42 @@ void TextureCacheDX9::SetTexture(bool force) {
|
||||
//glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, anisotropyLevel);
|
||||
pD3Ddevice->SetSamplerState(0, D3DSAMP_MAXANISOTROPY, anisotropyLevel);
|
||||
|
||||
UpdateSamplingParams(*entry, true);
|
||||
gstate_c.textureFullAlpha = entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL;
|
||||
gstate_c.textureSimpleAlpha = entry->GetAlphaStatus() != TexCacheEntry::STATUS_ALPHA_UNKNOWN;
|
||||
|
||||
gstate_c.textureFullAlpha = (entry->status & TexCacheEntry::STATUS_ALPHA_MASK) == TexCacheEntry::STATUS_ALPHA_FULL;
|
||||
UpdateSamplingParams(*entry, true);
|
||||
}
|
||||
|
||||
D3DFORMAT TextureCacheDX9::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const {
|
||||
switch (format) {
|
||||
case GE_TFMT_CLUT4:
|
||||
case GE_TFMT_CLUT8:
|
||||
case GE_TFMT_CLUT16:
|
||||
case GE_TFMT_CLUT32:
|
||||
return getClutDestFormat(clutFormat);
|
||||
case GE_TFMT_4444:
|
||||
return D3DFMT_A4R4G4B4;
|
||||
case GE_TFMT_5551:
|
||||
return D3DFMT_A1R5G5B5;
|
||||
case GE_TFMT_5650:
|
||||
return D3DFMT_R5G6B5;
|
||||
case GE_TFMT_8888:
|
||||
case GE_TFMT_DXT1:
|
||||
case GE_TFMT_DXT3:
|
||||
case GE_TFMT_DXT5:
|
||||
default:
|
||||
return D3DFMT_A8R8G8B8;
|
||||
}
|
||||
}
|
||||
|
||||
void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, u32 &dstFmt, int *bufwout) {
|
||||
void *finalBuf = NULL;
|
||||
|
||||
u32 texaddr = gstate.getTextureAddress(level);
|
||||
if (texaddr & 0x00600000 && Memory::IsVRAMAddress(texaddr)) {
|
||||
// This means it's in a mirror, possibly a swizzled mirror. Let's report.
|
||||
WARN_LOG_REPORT_ONCE(texmirror, G3D, "Decoding texture from VRAM mirror at %08x swizzle=%d", texaddr, gstate.isTextureSwizzled() ? 1 : 0);
|
||||
}
|
||||
|
||||
int bufw = GetTextureBufw(level, texaddr, format);
|
||||
if (bufwout)
|
||||
@ -931,8 +1288,6 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
switch (format) {
|
||||
case GE_TFMT_CLUT4:
|
||||
{
|
||||
dstFmt = getClutDestFormat(clutformat);
|
||||
|
||||
const bool mipmapShareClut = gstate.isClutSharedForMipmaps();
|
||||
const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
|
||||
|
||||
@ -990,19 +1345,16 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
break;
|
||||
|
||||
case GE_TFMT_CLUT8:
|
||||
dstFmt = getClutDestFormat(gstate.getClutPaletteFormat());
|
||||
texByteAlign = texByteAlignMap[gstate.getClutPaletteFormat()];
|
||||
finalBuf = ReadIndexedTex(level, texptr, 1, dstFmt, bufw);
|
||||
break;
|
||||
|
||||
case GE_TFMT_CLUT16:
|
||||
dstFmt = getClutDestFormat(gstate.getClutPaletteFormat());
|
||||
texByteAlign = texByteAlignMap[gstate.getClutPaletteFormat()];
|
||||
finalBuf = ReadIndexedTex(level, texptr, 2, dstFmt, bufw);
|
||||
break;
|
||||
|
||||
case GE_TFMT_CLUT32:
|
||||
dstFmt = getClutDestFormat(gstate.getClutPaletteFormat());
|
||||
texByteAlign = texByteAlignMap[gstate.getClutPaletteFormat()];
|
||||
finalBuf = ReadIndexedTex(level, texptr, 4, dstFmt, bufw);
|
||||
break;
|
||||
@ -1010,12 +1362,6 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
case GE_TFMT_4444:
|
||||
case GE_TFMT_5551:
|
||||
case GE_TFMT_5650:
|
||||
if (format == GE_TFMT_4444)
|
||||
dstFmt = D3DFMT_A4R4G4B4;
|
||||
else if (format == GE_TFMT_5551)
|
||||
dstFmt = D3DFMT_A1R5G5B5;
|
||||
else if (format == GE_TFMT_5650)
|
||||
dstFmt = D3DFMT_R5G6B5;
|
||||
texByteAlign = 2;
|
||||
|
||||
if (!gstate.isTextureSwizzled()) {
|
||||
@ -1032,7 +1378,6 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
break;
|
||||
|
||||
case GE_TFMT_8888:
|
||||
dstFmt = D3DFMT_A8R8G8B8;
|
||||
if (!gstate.isTextureSwizzled()) {
|
||||
// Special case: if we don't need to deal with packing, we don't need to copy.
|
||||
//if (w == bufw) {
|
||||
@ -1045,15 +1390,13 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
Memory::Memcpy(tmpTexBuf32.data(), texaddr, len * sizeof(u32));
|
||||
finalBuf = tmpTexBuf32.data();
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
finalBuf = UnswizzleFromMem(texptr, bufw, 4, level);
|
||||
}
|
||||
break;
|
||||
|
||||
case GE_TFMT_DXT1:
|
||||
dstFmt = D3DFMT_A8R8G8B8;
|
||||
{
|
||||
int minw = std::min(bufw, w);
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
@ -1074,7 +1417,6 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
break;
|
||||
|
||||
case GE_TFMT_DXT3:
|
||||
dstFmt = D3DFMT_A8R8G8B8;
|
||||
{
|
||||
int minw = std::min(bufw, w);
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
@ -1094,8 +1436,7 @@ void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteForma
|
||||
}
|
||||
break;
|
||||
|
||||
case GE_TFMT_DXT5: // These work fine now
|
||||
dstFmt = D3DFMT_A8R8G8B8;
|
||||
case GE_TFMT_DXT5:
|
||||
{
|
||||
int minw = std::min(bufw, w);
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
@ -1246,13 +1587,10 @@ static inline void copyTexture(int xoffset, int yoffset, int w, int h, int pitch
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages) {
|
||||
void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages, int scaleFactor, u32 dstFmt) {
|
||||
// TODO: only do this once
|
||||
u32 texByteAlign = 1;
|
||||
|
||||
// TODO: Look into using BGRA for 32-bit textures when the GL_EXT_texture_format_BGRA8888 extension is available, as it's faster than RGBA on some chips.
|
||||
u32 dstFmt = 0;
|
||||
|
||||
GEPaletteFormat clutformat = gstate.getClutPaletteFormat();
|
||||
int bufw;
|
||||
void *finalBuf = DecodeTextureLevel(GETextureFormat(entry.format), clutformat, level, texByteAlign, dstFmt, &bufw);
|
||||
@ -1266,26 +1604,19 @@ void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, int level, bool rep
|
||||
gpuStats.numTexturesDecoded++;
|
||||
|
||||
u32 *pixelData = (u32 *)finalBuf;
|
||||
|
||||
int scaleFactor = g_Config.iTexScalingLevel;
|
||||
|
||||
// Don't scale the PPGe texture.
|
||||
if (entry.addr > 0x05000000 && entry.addr < 0x08800000)
|
||||
scaleFactor = 1;
|
||||
|
||||
if (scaleFactor > 1 && entry.numInvalidated == 0)
|
||||
if (scaleFactor > 1 && (entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0)
|
||||
scaler.Scale(pixelData, dstFmt, w, h, scaleFactor);
|
||||
// Or always?
|
||||
if (entry.numInvalidated == 0) {
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, bufw, w, h);
|
||||
entry.status = ((entry.status & ~TexCacheEntry::STATUS_ALPHA_MASK) | alphaStatus);
|
||||
|
||||
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, w, w, h);
|
||||
entry.SetAlphaStatus(alphaStatus, level);
|
||||
} else {
|
||||
entry.status |= TexCacheEntry::STATUS_ALPHA_UNKNOWN;
|
||||
entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
|
||||
}
|
||||
|
||||
// Ignore mip map atm
|
||||
if (level == 0) {
|
||||
if (replaceImages) {
|
||||
if (replaceImages) {
|
||||
// Unset texture
|
||||
pD3Ddevice->SetTexture(0, NULL);
|
||||
|
||||
|
@ -27,6 +27,8 @@
|
||||
namespace DX9 {
|
||||
|
||||
struct VirtualFramebufferDX9;
|
||||
class FramebufferManagerDX9;
|
||||
class ShaderManagerDX9;
|
||||
|
||||
enum TextureFiltering {
|
||||
AUTO = 1,
|
||||
@ -41,13 +43,13 @@ enum FramebufferNotification {
|
||||
NOTIFY_FB_DESTROYED,
|
||||
};
|
||||
|
||||
class TextureCacheDX9
|
||||
{
|
||||
class TextureCacheDX9 {
|
||||
public:
|
||||
TextureCacheDX9();
|
||||
~TextureCacheDX9();
|
||||
|
||||
void SetTexture(bool t = false);
|
||||
void SetTexture(bool force = false);
|
||||
bool SetOffsetTexture(u32 offset);
|
||||
|
||||
void Clear(bool delete_them);
|
||||
void StartFrame();
|
||||
@ -60,6 +62,13 @@ public:
|
||||
// are being rendered to. This is barebones so far.
|
||||
void NotifyFramebuffer(u32 address, VirtualFramebufferDX9 *framebuffer, FramebufferNotification msg);
|
||||
|
||||
void SetFramebufferManager(FramebufferManagerDX9 *fbManager) {
|
||||
framebufferManager_ = fbManager;
|
||||
}
|
||||
void SetShaderManager(ShaderManagerDX9 *sm) {
|
||||
shaderManager_ = sm;
|
||||
}
|
||||
|
||||
size_t NumLoadedTextures() const {
|
||||
return cache.size();
|
||||
}
|
||||
@ -67,7 +76,8 @@ public:
|
||||
// Only used by Qt UI?
|
||||
bool DecodeTexture(u8 *output, GPUgstate state);
|
||||
|
||||
private:
|
||||
void ForgetLastTexture();
|
||||
|
||||
// Wow this is starting to grow big. Soon need to start looking at resizing it.
|
||||
// Must stay a POD.
|
||||
struct TexCacheEntry {
|
||||
@ -76,14 +86,19 @@ private:
|
||||
|
||||
enum Status {
|
||||
STATUS_HASHING = 0x00,
|
||||
STATUS_RELIABLE = 0x01, // cache, don't hash
|
||||
STATUS_UNRELIABLE = 0x02, // never cache
|
||||
STATUS_RELIABLE = 0x01, // Don't bother rehashing.
|
||||
STATUS_UNRELIABLE = 0x02, // Always recheck hash.
|
||||
STATUS_MASK = 0x03,
|
||||
|
||||
STATUS_ALPHA_UNKNOWN = 0x04,
|
||||
STATUS_ALPHA_FULL = 0x00, // Has no alpha channel, or always full alpha.
|
||||
STATUS_ALPHA_SIMPLE = 0x08, // Like above, but also has 0 alpha (e.g. 5551.)
|
||||
STATUS_ALPHA_FULL = 0x00, // Has no alpha channel, or always full alpha.
|
||||
STATUS_ALPHA_SIMPLE = 0x08, // Like above, but also has 0 alpha (e.g. 5551.)
|
||||
STATUS_ALPHA_MASK = 0x0c,
|
||||
|
||||
STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 15 frames in between.)
|
||||
STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail.
|
||||
STATUS_DEPALETTIZE = 0x40, // Needs to go through a depalettize pass.
|
||||
STATUS_TO_SCALE = 0x80, // Pending texture scaling in a later frame.
|
||||
};
|
||||
|
||||
// Status, but int so we can zero initialize.
|
||||
@ -99,12 +114,33 @@ private:
|
||||
u8 format;
|
||||
u16 dim;
|
||||
u16 bufw;
|
||||
LPDIRECT3DTEXTURE9 texture; //GLuint
|
||||
LPDIRECT3DTEXTURE9 texture;
|
||||
int invalidHint;
|
||||
u32 fullhash;
|
||||
u32 cluthash;
|
||||
int maxLevel;
|
||||
float lodBias;
|
||||
|
||||
Status GetHashStatus() {
|
||||
return Status(status & STATUS_MASK);
|
||||
}
|
||||
void SetHashStatus(Status newStatus) {
|
||||
status = (status & ~STATUS_MASK) | newStatus;
|
||||
}
|
||||
Status GetAlphaStatus() {
|
||||
return Status(status & STATUS_ALPHA_MASK);
|
||||
}
|
||||
void SetAlphaStatus(Status newStatus) {
|
||||
status = (status & ~STATUS_ALPHA_MASK) | newStatus;
|
||||
}
|
||||
void SetAlphaStatus(Status newStatus, int level) {
|
||||
// For non-level zero, only set more restrictive.
|
||||
if (newStatus == STATUS_ALPHA_UNKNOWN || level == 0) {
|
||||
SetAlphaStatus(newStatus);
|
||||
} else if (newStatus == STATUS_ALPHA_SIMPLE && GetAlphaStatus() == STATUS_ALPHA_FULL) {
|
||||
SetAlphaStatus(STATUS_ALPHA_SIMPLE);
|
||||
}
|
||||
}
|
||||
bool Matches(u16 dim2, u8 format2, int maxLevel2);
|
||||
void ReleaseTexture() {
|
||||
if (texture) {
|
||||
@ -113,28 +149,44 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
void SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight);
|
||||
|
||||
private:
|
||||
typedef std::map<u64, TexCacheEntry> TexCache;
|
||||
|
||||
void Decimate(); // Run this once per frame to get rid of old textures.
|
||||
void DeleteTexture(TexCache::iterator it);
|
||||
void *UnswizzleFromMem(const u8 *texptr, u32 bufw, u32 bytesPerPixel, u32 level);
|
||||
void *ReadIndexedTex(int level, const u8 *texptr, int bytesPerIndex, u32 dstFmt, int bufw);
|
||||
void GetSamplingParams(int &minFilt, int &magFilt, bool &sClamp, bool &tClamp, float &lodBias, int maxLevel);
|
||||
void UpdateSamplingParams(TexCacheEntry &entry, bool force);
|
||||
void LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages);
|
||||
void LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages, int scaleFactor, u32 dstFmt);
|
||||
D3DFORMAT GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
|
||||
void *DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, u32 &dstFmt, int *bufw = 0);
|
||||
TexCacheEntry::Status CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h);
|
||||
template <typename T>
|
||||
const T *GetCurrentClut();
|
||||
u32 GetCurrentClutHash();
|
||||
void UpdateCurrentClut();
|
||||
void AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer, bool exactMatch);
|
||||
bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer, u32 texaddrOffset = 0);
|
||||
void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebufferDX9 *framebuffer);
|
||||
void SetTextureFramebuffer(TexCacheEntry *entry);
|
||||
void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer);
|
||||
|
||||
TexCacheEntry *GetEntryAt(u32 texaddr);
|
||||
|
||||
typedef std::map<u64, TexCacheEntry> TexCache;
|
||||
TexCache cache;
|
||||
TexCache secondCache;
|
||||
std::vector<VirtualFramebufferDX9 *> fbCache_;
|
||||
|
||||
// Separate to keep main texture cache size down.
|
||||
struct AttachedFramebufferInfo {
|
||||
u32 xOffset;
|
||||
u32 yOffset;
|
||||
};
|
||||
std::map<u32, AttachedFramebufferInfo> fbTexInfo_;
|
||||
void AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer, const AttachedFramebufferInfo &fbInfo);
|
||||
void AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebufferDX9 *framebuffer, const AttachedFramebufferInfo &fbInfo);
|
||||
|
||||
bool clearCacheNextFrame_;
|
||||
bool lowMemoryMode_;
|
||||
TextureScalerDX9 scaler;
|
||||
@ -150,6 +202,7 @@ private:
|
||||
u32 *clutBuf_;
|
||||
u32 clutHash_;
|
||||
u32 clutTotalBytes_;
|
||||
u32 clutMaxBytes_;
|
||||
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
|
||||
bool clutAlphaLinear_;
|
||||
u16 clutAlphaLinearColor_;
|
||||
@ -158,6 +211,13 @@ private:
|
||||
float maxAnisotropyLevel;
|
||||
|
||||
int decimationCounter_;
|
||||
int texelsScaledThisFrame_;
|
||||
int timesInvalidatedAllThisFrame_;
|
||||
|
||||
FramebufferManagerDX9 *framebufferManager_;
|
||||
ShaderManagerDX9 *shaderManager_;
|
||||
};
|
||||
|
||||
D3DFORMAT getClutDestFormat(GEPaletteFormat format);
|
||||
|
||||
};
|
||||
|
@ -61,10 +61,10 @@ namespace {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = ((val>>12) & 0xF) * 17;
|
||||
u32 g = ((val>> 8) & 0xF) * 17;
|
||||
u32 b = ((val>> 4) & 0xF) * 17;
|
||||
u32 a = ((val>> 0) & 0xF) * 17;
|
||||
u32 r = ((val>> 0) & 0xF) * 17;
|
||||
u32 g = ((val>> 4) & 0xF) * 17;
|
||||
u32 b = ((val>> 8) & 0xF) * 17;
|
||||
u32 a = ((val>>12) & 0xF) * 17;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
@ -75,9 +75,9 @@ namespace {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val>>11) & 0x1F);
|
||||
u32 r = Convert5To8((val ) & 0x1F);
|
||||
u32 g = Convert6To8((val>> 5) & 0x3F);
|
||||
u32 b = Convert5To8((val ) & 0x1F);
|
||||
u32 b = Convert5To8((val>>11) & 0x1F);
|
||||
out[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
@ -88,10 +88,10 @@ namespace {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val>>11) & 0x1F);
|
||||
u32 g = Convert5To8((val>> 6) & 0x1F);
|
||||
u32 b = Convert5To8((val>> 1) & 0x1F);
|
||||
u32 a = (val & 0x1) * 255;
|
||||
u32 r = Convert5To8((val>> 0) & 0x1F);
|
||||
u32 g = Convert5To8((val>> 5) & 0x1F);
|
||||
u32 b = Convert5To8((val>>10) & 0x1F);
|
||||
u32 a = ((val >> 15) & 0x1) * 255;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
|
@ -1092,6 +1092,8 @@ void TransformDrawEngineDX9::DoFlush() {
|
||||
vai->numVerts = indexGen.VertexCount();
|
||||
vai->prim = indexGen.Prim();
|
||||
vai->maxIndex = indexGen.MaxIndex();
|
||||
vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0;
|
||||
|
||||
goto rotateVBO;
|
||||
}
|
||||
|
||||
@ -1167,6 +1169,7 @@ void TransformDrawEngineDX9::DoFlush() {
|
||||
gpuStats.numCachedDrawCalls++;
|
||||
useElements = vai->ebo ? true : false;
|
||||
gpuStats.numCachedVertsDrawn += vai->numVerts;
|
||||
gstate_c.vertexFullAlpha = vai->flags & VAI_FLAG_VERTEXFULLALPHA;
|
||||
}
|
||||
vb_ = vai->vbo;
|
||||
ib_ = vai->ebo;
|
||||
@ -1192,6 +1195,8 @@ void TransformDrawEngineDX9::DoFlush() {
|
||||
|
||||
maxIndex = vai->maxIndex;
|
||||
prim = static_cast<GEPrimitiveType>(vai->prim);
|
||||
|
||||
gstate_c.vertexFullAlpha = vai->flags & VAI_FLAG_VERTEXFULLALPHA;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1221,6 +1226,12 @@ rotateVBO:
|
||||
}
|
||||
|
||||
DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount);
|
||||
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
|
||||
if (gstate.isModeThrough()) {
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
|
||||
} else {
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
|
||||
}
|
||||
|
||||
IDirect3DVertexDeclaration9 *pHardwareVertexDecl = SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), dec_->VertexType());
|
||||
|
||||
@ -1246,6 +1257,13 @@ rotateVBO:
|
||||
}
|
||||
} else {
|
||||
DecodeVerts();
|
||||
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
|
||||
if (gstate.isModeThrough()) {
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
|
||||
} else {
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
|
||||
}
|
||||
|
||||
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
|
||||
prim = indexGen.Prim();
|
||||
// Undo the strip optimization, not supported by the SW code yet.
|
||||
@ -1264,6 +1282,7 @@ rotateVBO:
|
||||
numDrawCalls = 0;
|
||||
vertexCountInDrawCalls = 0;
|
||||
prevPrim_ = GE_PRIM_INVALID;
|
||||
gstate_c.vertexFullAlpha = true;
|
||||
|
||||
host->GPUNotifyDraw();
|
||||
}
|
||||
|
@ -44,6 +44,10 @@ class FramebufferManagerDX9;
|
||||
// DRAWN_ONCE -> death
|
||||
// DRAWN_RELIABLE -> death
|
||||
|
||||
enum {
|
||||
VAI_FLAG_VERTEXFULLALPHA = 1,
|
||||
};
|
||||
|
||||
|
||||
// Don't bother storing information about draws smaller than this.
|
||||
enum {
|
||||
@ -64,8 +68,10 @@ public:
|
||||
lastFrame = gpuStats.numFlips;
|
||||
numVerts = 0;
|
||||
drawsUntilNextFullHash = 0;
|
||||
flags = 0;
|
||||
}
|
||||
~VertexArrayInfoDX9();
|
||||
|
||||
enum Status {
|
||||
VAI_NEW,
|
||||
VAI_HASHING,
|
||||
@ -80,7 +86,6 @@ public:
|
||||
LPDIRECT3DVERTEXBUFFER9 vbo;
|
||||
LPDIRECT3DINDEXBUFFER9 ebo;
|
||||
|
||||
|
||||
// Precalculated parameter for drawRangeElements
|
||||
u16 numVerts;
|
||||
u16 maxIndex;
|
||||
@ -92,9 +97,9 @@ public:
|
||||
int numFrames;
|
||||
int lastFrame; // So that we can forget.
|
||||
u16 drawsUntilNextFullHash;
|
||||
u8 flags;
|
||||
};
|
||||
|
||||
|
||||
// Handles transform, lighting and drawing.
|
||||
class TransformDrawEngineDX9 {
|
||||
public:
|
||||
|
@ -231,6 +231,7 @@ void VertexDecoderDX9::Step_Color565() const
|
||||
c[1] = Convert6To8((cdata>>5) & 0x3f);
|
||||
c[2] = Convert5To8((cdata>>11) & 0x1f);
|
||||
c[3] = 255;
|
||||
// Always full alpha.
|
||||
}
|
||||
|
||||
void VertexDecoderDX9::Step_Color5551() const
|
||||
@ -241,6 +242,7 @@ void VertexDecoderDX9::Step_Color5551() const
|
||||
c[1] = Convert5To8((cdata>>5) & 0x1f);
|
||||
c[2] = Convert5To8((cdata>>10) & 0x1f);
|
||||
c[3] = (cdata >> 15) ? 255 : 0;
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] != 0;
|
||||
}
|
||||
|
||||
void VertexDecoderDX9::Step_Color4444() const
|
||||
@ -251,6 +253,7 @@ void VertexDecoderDX9::Step_Color4444() const
|
||||
c[1] = Convert4To8((cdata >> (4)) & 0xF);
|
||||
c[2] = Convert4To8((cdata >> (8)) & 0xF);
|
||||
c[3] = Convert4To8((cdata >> (12)) & 0xF);
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoderDX9::Step_Color8888() const
|
||||
@ -261,6 +264,7 @@ void VertexDecoderDX9::Step_Color8888() const
|
||||
c[1] = cdata[1];
|
||||
c[2] = cdata[2];
|
||||
c[3] = cdata[3];
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoderDX9::Step_Color565Morph() const
|
||||
@ -270,16 +274,16 @@ void VertexDecoderDX9::Step_Color565Morph() const
|
||||
{
|
||||
float w = gstate_c.morphWeights[n];
|
||||
u16 cdata = (u16)(*(u16_le*)(ptr_ + onesize_*n + coloff));
|
||||
|
||||
col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f);
|
||||
col[1] += w * ((cdata>>5) & 0x3f) * (255.0f / 63.0f);
|
||||
col[2] += w * ((cdata>>11) & 0x1f) * (255.0f / 31.0f);
|
||||
}
|
||||
u8 *c = decoded_ + decFmt.c0off;
|
||||
c[0] = (u8)col[0];
|
||||
c[1] = (u8)col[1];
|
||||
c[2] = (u8)col[2];
|
||||
for (int i = 0; i < 3; i++) {
|
||||
c[i] = clamp_u8((int)col[i]);
|
||||
}
|
||||
c[3] = 255;
|
||||
// Always full alpha.
|
||||
}
|
||||
|
||||
void VertexDecoderDX9::Step_Color5551Morph() const
|
||||
@ -295,10 +299,10 @@ void VertexDecoderDX9::Step_Color5551Morph() const
|
||||
col[3] += w * ((cdata>>15) ? 255.0f : 0.0f);
|
||||
}
|
||||
u8 *c = decoded_ + decFmt.c0off;
|
||||
c[0] = (u8)col[0];
|
||||
c[1] = (u8)col[1];
|
||||
c[2] = (u8)col[2];
|
||||
c[3] = (u8)col[3];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
c[i] = clamp_u8((int)col[i]);
|
||||
}
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoderDX9::Step_Color4444Morph() const
|
||||
@ -312,10 +316,10 @@ void VertexDecoderDX9::Step_Color4444Morph() const
|
||||
col[j] += w * ((cdata >> (j * 4)) & 0xF) * (255.0f / 15.0f);
|
||||
}
|
||||
u8 *c = decoded_ + decFmt.c0off;
|
||||
c[0] = (u8)col[0];
|
||||
c[1] = (u8)col[1];
|
||||
c[2] = (u8)col[2];
|
||||
c[3] = (u8)col[3];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
c[i] = clamp_u8((int)col[i]);
|
||||
}
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoderDX9::Step_Color8888Morph() const
|
||||
@ -329,10 +333,10 @@ void VertexDecoderDX9::Step_Color8888Morph() const
|
||||
col[j] += w * cdata[j];
|
||||
}
|
||||
u8 *c = decoded_ + decFmt.c0off;
|
||||
c[0] = (u8)col[0];
|
||||
c[1] = (u8)col[1];
|
||||
c[2] = (u8)col[2];
|
||||
c[3] = (u8)col[3];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
c[i] = clamp_u8((int)col[i]);
|
||||
}
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoderDX9::Step_NormalS8() const
|
||||
|
@ -90,10 +90,10 @@ bool IsAlphaTestTriviallyTrue() {
|
||||
#endif
|
||||
return (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed())) || (
|
||||
(!gstate.isStencilTestEnabled() &&
|
||||
!gstate.isDepthTestEnabled() &&
|
||||
!gstate.isDepthTestEnabled() &&
|
||||
gstate.getAlphaTestRef() == 0 &&
|
||||
gstate.isAlphaBlendEnabled() &&
|
||||
gstate.getBlendFuncA() == GE_SRCBLEND_SRCALPHA &&
|
||||
gstate.getBlendFuncA() == GE_SRCBLEND_SRCALPHA &&
|
||||
safeDestFactors[(int)gstate.getBlendFuncB()]));
|
||||
}
|
||||
|
||||
|
@ -112,6 +112,7 @@ void TextureCache::Clear(bool delete_them) {
|
||||
cache.clear();
|
||||
secondCache.clear();
|
||||
}
|
||||
fbTexInfo_.clear();
|
||||
}
|
||||
|
||||
void TextureCache::DeleteTexture(TexCache::iterator it) {
|
||||
@ -890,7 +891,7 @@ void TextureCache::LoadClut() {
|
||||
u32 clutAddr = gstate.getClutAddress();
|
||||
if (Memory::IsValidAddress(clutAddr)) {
|
||||
#ifdef _M_SSE
|
||||
int numBlocks = gstate.getClutLoadBlocks();
|
||||
int numBlocks = gstate.getClutLoadBlocks();
|
||||
clutTotalBytes_ = numBlocks * 32;
|
||||
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
|
||||
__m128i *dest = (__m128i *)clutBufRaw_;
|
||||
@ -936,7 +937,7 @@ void TextureCache::UpdateCurrentClut() {
|
||||
clutAlphaLinear_ = false;
|
||||
clutAlphaLinearColor_ = 0;
|
||||
if (gstate.getClutPaletteFormat() == GE_CMODE_16BIT_ABGR4444 && gstate.isClutIndexSimple()) {
|
||||
const u16 *clut = GetCurrentClut<u16>();
|
||||
const u16_le *clut = GetCurrentClut<u16_le>();
|
||||
clutAlphaLinear_ = true;
|
||||
clutAlphaLinearColor_ = clut[15] & 0xFFF0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
@ -1725,8 +1726,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
|
||||
finalBuf = tmpTexBuf32.data();
|
||||
ConvertColors(finalBuf, texptr, dstFmt, bufw * h);
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
tmpTexBuf32.resize(std::max(bufw, w) * h);
|
||||
finalBuf = UnswizzleFromMem(texptr, bufw, 4, level);
|
||||
ConvertColors(finalBuf, finalBuf, dstFmt, bufw * h);
|
||||
|
Loading…
x
Reference in New Issue
Block a user