Merge pull request #6920 from hrydgard/dx9-depal

Dx9 depal - needs debugging before merge
This commit is contained in:
Henrik Rydgård 2015-03-01 21:04:36 +01:00
commit c0450f7f9e
16 changed files with 651 additions and 252 deletions

View File

@ -1378,6 +1378,8 @@ if(ARMV7)
set(GPU_NEON GPU/Common/TextureDecoderNEON.cpp)
endif()
add_library(GPU OBJECT
GPU/Common/DepalettizeShaderCommon.cpp
GPU/Common/DepalettizeShaderCommon.h
GPU/Common/FramebufferCommon.cpp
GPU/Common/FramebufferCommon.h
GPU/Common/GPUDebugInterface.h

View File

@ -0,0 +1,259 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <stdio.h>
#include "Common/Log.h"
#include "Core/Reporting.h"
#include "GPU/GPUState.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
#define WRITE p+=sprintf
// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) {
char *p = buffer;
#ifdef USING_GLES2
WRITE(p, "#version 300 es\n");
WRITE(p, "precision mediump float;\n");
#else
WRITE(p, "#version 330\n");
#endif
WRITE(p, "in vec2 v_texcoord0;\n");
WRITE(p, "out vec4 fragColor0;\n");
WRITE(p, "uniform sampler2D tex;\n");
WRITE(p, "uniform sampler2D pal;\n");
WRITE(p, "void main() {\n");
WRITE(p, " vec4 color = texture(tex, v_texcoord0);\n");
int mask = gstate.getClutIndexMask();
int shift = gstate.getClutIndexShift();
int offset = gstate.getClutIndexStartPos();
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
// Unfortunately sampling turned our texture into floating point. To avoid this, might be able
// to declare them as isampler2D objects, but these require integer textures, which needs more work.
// Anyhow, we simply work around this by converting back to integer. Hopefully there will be no loss of precision.
// Use the mask to skip reading some components.
int shiftedMask = mask << shift;
switch (pixelFormat) {
case GE_FORMAT_8888:
if (shiftedMask & 0xFF) WRITE(p, " int r = int(color.r * 255.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0xFF00) WRITE(p, " int g = int(color.g * 255.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0xFF0000) WRITE(p, " int b = int(color.b * 255.99);\n"); else WRITE(p, " int b = 0;\n");
if (shiftedMask & 0xFF000000) WRITE(p, " int a = int(color.a * 255.99);\n"); else WRITE(p, " int a = 0;\n");
WRITE(p, " int index = (a << 24) | (b << 16) | (g << 8) | (r);\n");
break;
case GE_FORMAT_4444:
if (shiftedMask & 0xF) WRITE(p, " int r = int(color.r * 15.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0xF0) WRITE(p, " int g = int(color.g * 15.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0xF00) WRITE(p, " int b = int(color.b * 15.99);\n"); else WRITE(p, " int b = 0;\n");
if (shiftedMask & 0xF000) WRITE(p, " int a = int(color.a * 15.99);\n"); else WRITE(p, " int a = 0;\n");
WRITE(p, " int index = (a << 12) | (b << 8) | (g << 4) | (r);\n");
break;
case GE_FORMAT_565:
if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0x7E0) WRITE(p, " int g = int(color.g * 63.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0xF800) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n");
WRITE(p, " int index = (b << 11) | (g << 5) | (r);\n");
break;
case GE_FORMAT_5551:
if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0x3E0) WRITE(p, " int g = int(color.g * 31.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0x7C00) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n");
if (shiftedMask & 0x8000) WRITE(p, " int a = int(color.a);\n"); else WRITE(p, " int a = 0;\n");
WRITE(p, " int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
break;
default:
break;
}
float texturePixels = 256;
if (clutFormat != GE_CMODE_32BIT_ABGR8888)
texturePixels = 512;
if (shift) {
WRITE(p, " index = ((index >> %i) & 0x%02x)", shift, mask);
} else {
WRITE(p, " index = (index & 0x%02x)", mask);
}
if (offset) {
WRITE(p, " | %i;\n", offset); // '|' matches what we have in gstate.h
} else {
WRITE(p, ";\n");
}
WRITE(p, " fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
WRITE(p, "}\n");
}
// FP only, to suit GL(ES) 2.0
void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage lang) {
char *p = buffer;
const char *modFunc = lang == HLSL_DX9 ? "fmod" : "mod";
char lookupMethod[128] = "index.r";
char offset[128] = "";
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
const u32 clutBase = gstate.getClutIndexStartPos();
const int shift = gstate.getClutIndexShift();
const int mask = gstate.getClutIndexMask();
float index_multiplier = 1.0f;
// pixelformat is the format of the texture we are sampling.
bool formatOK = true;
switch (pixelFormat) {
case GE_FORMAT_8888:
if ((mask & (mask + 1)) == 0) {
// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
const char *rgba = "rrrrrrrrggggggggbbbbbbbbaaaaaaaa";
const u8 rgba_shift = shift & 7;
if (rgba_shift == 0 && mask == 0xFF) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
} else {
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 255.99f / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
// Format was OK if there weren't bits from another component.
formatOK = mask <= 255 - (1 << rgba_shift);
}
} else {
formatOK = false;
}
break;
case GE_FORMAT_4444:
if ((mask & (mask + 1)) == 0 && shift < 16) {
const char *rgba = "rrrrggggbbbbaaaa";
const u8 rgba_shift = shift & 3;
if (rgba_shift == 0 && mask == 0xF) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
index_multiplier = 15.0f / 256.0f;
} else {
// Let's divide and mod to get the right bits. A common case is shift=0, mask=01.
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 15.99f / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
formatOK = mask <= 15 - (1 << rgba_shift);
}
} else {
formatOK = false;
}
break;
case GE_FORMAT_565:
if ((mask & (mask + 1)) == 0 && shift < 16) {
const u8 shifts[16] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4 };
const int multipliers[16] = { 31, 31, 31, 31, 31, 63, 63, 63, 63, 63, 63, 31, 31, 31, 31, 31 };
const char *rgba = "rrrrrggggggbbbbb";
const u8 rgba_shift = shifts[shift];
if (rgba_shift == 0 && mask == multipliers[shift]) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
index_multiplier = multipliers[shift] / 256.0f;
} else {
// We just need to divide the right component by the right value, and then mod against the mask.
// A common case is shift=1, mask=0f.
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
formatOK = mask <= multipliers[shift] - (1 << rgba_shift);
}
} else {
formatOK = false;
}
break;
case GE_FORMAT_5551:
if ((mask & (mask + 1)) == 0 && shift < 16) {
const char *rgba = "rrrrrgggggbbbbba";
const u8 rgba_shift = shift % 5;
if (rgba_shift == 0 && mask == 0x1F) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
index_multiplier = 31.0f / 256.0f;
} else if (shift == 15 && mask == 1) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
index_multiplier = 1.0f / 256.0f;
} else {
// A isn't possible here.
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 31.99f / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
formatOK = mask <= 31 - (1 << rgba_shift);
}
} else {
formatOK = false;
}
break;
default:
break;
}
float texturePixels = 256.f;
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.f;
index_multiplier *= 0.5f;
}
// Adjust index_multiplier, similar to the use of 15.99 instead of 16 in the ES 3 path.
// index_multiplier -= 0.01f / texturePixels;
if (!formatOK) {
ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", pixelFormat, shift, mask, clutBase);
}
// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
float texel_offset = ((float)clutBase + 0.5f) / texturePixels;
sprintf(offset, " + %f", texel_offset);
if (lang == GLSL_140) {
#ifdef USING_GLES2
WRITE(p, "#version 100\n");
WRITE(p, "precision mediump float;\n");
#else
WRITE(p, "#version 110\n");
#endif
WRITE(p, "varying vec2 v_texcoord0;\n");
WRITE(p, "uniform sampler2D tex;\n");
WRITE(p, "uniform sampler2D pal;\n");
WRITE(p, "void main() {\n");
WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n");
WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
WRITE(p, " gl_FragColor = texture2D(pal, vec2(coord, 0.0));\n");
WRITE(p, "}\n");
} else if (lang == HLSL_DX9) {
WRITE(p, "sampler tex: register(s0);\n");
WRITE(p, "sampler pal: register(s1);\n");
WRITE(p, "float4 main(float2 v_texcoord0 : TEXCOORD0) : COLOR0 {\n");
WRITE(p, " float4 index = tex2D(tex, v_texcoord0);\n");
WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
WRITE(p, " return tex2D(pal, float2(coord, 0.0)).bgra;\n");
WRITE(p, "}\n");
}
}
void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language) {
switch (language) {
case GLSL_140:
GenerateDepalShaderFloat(buffer, pixelFormat, language);
break;
case GLSL_300:
GenerateDepalShader300(buffer, pixelFormat);
break;
case HLSL_DX9:
GenerateDepalShaderFloat(buffer, pixelFormat, language);
break;
}
}
#undef WRITE

View File

@ -0,0 +1,28 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "GPU/ge_constants.h"
enum ShaderLanguage {
GLSL_140,
GLSL_300,
HLSL_DX9,
};
void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language);

View File

@ -0,0 +1,175 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <map>
#include "base/logging.h"
#include "Common/Log.h"
#include "Core/Reporting.h"
#include "GPU/GPUState.h"
#include "GPU/Directx9/TextureCacheDX9.h"
#include "GPU/Directx9/DepalettizeShaderDX9.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
#include "GPU/Directx9/helper/global.h"
namespace DX9 {
static const int DEPAL_TEXTURE_OLD_AGE = 120;
#ifdef _WIN32
#define SHADERLOG
#endif
static const char *depalVShaderHLSL =
"struct VS_IN {\n"
" float3 a_position : POSITION;\n"
" float2 a_texcoord0 : TEXCOORD0;\n"
"};\n"
"struct VS_OUT {\n"
" float4 Position : POSITION;\n"
" float2 Texcoord : TEXCOORD0;\n"
"};\n"
"VS_OUT main(VS_IN input) {\n"
" VS_OUT output;\n"
" output.Texcoord = input.a_texcoord0;\n"
" output.Position = float4(input.a_position, 1.0);\n"
" return output;\n"
"}\n";
DepalShaderCacheDX9::DepalShaderCacheDX9() : vertexShader_(nullptr) {
std::string errorMessage;
if (!DX9::CompileVertexShader(depalVShaderHLSL, &vertexShader_, nullptr, errorMessage)) {
ERROR_LOG(G3D, "error compling depal vshader: %s", errorMessage.c_str());
}
}
DepalShaderCacheDX9::~DepalShaderCacheDX9() {
Clear();
if (vertexShader_) {
vertexShader_->Release();
}
}
u32 DepalShaderCacheDX9::GenerateShaderID(GEBufferFormat pixelFormat) {
return (gstate.clutformat & 0xFFFFFF) | (pixelFormat << 24);
}
LPDIRECT3DTEXTURE9 DepalShaderCacheDX9::GetClutTexture(const u32 clutID, u32 *rawClut) {
GEPaletteFormat palFormat = gstate.getClutPaletteFormat();
const u32 realClutID = clutID ^ palFormat;
auto oldtex = texCache_.find(realClutID);
if (oldtex != texCache_.end()) {
oldtex->second->lastFrame = gpuStats.numFlips;
return oldtex->second->texture;
}
D3DFORMAT dstFmt = DX9::getClutDestFormat(palFormat);
int texturePixels = palFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512;
DepalTextureDX9 *tex = new DepalTextureDX9();
// Create texture
D3DPOOL pool = D3DPOOL_MANAGED;
int usage = 0;
if (pD3DdeviceEx) {
pool = D3DPOOL_DEFAULT;
usage = D3DUSAGE_DYNAMIC; // TODO: Switch to using a staging texture?
}
HRESULT hr = pD3Ddevice->CreateTexture(texturePixels, 1, 1, usage, (D3DFORMAT)D3DFMT(dstFmt), pool, &tex->texture, NULL);
if (FAILED(hr)) {
ERROR_LOG(G3D, "Failed to create D3D texture for depal");
return nullptr;
}
D3DLOCKED_RECT rect;
hr = tex->texture->LockRect(0, &rect, NULL, 0);
if (FAILED(hr)) {
ERROR_LOG(G3D, "Failed to lock D3D texture for depal");
return nullptr;
}
// Regardless of format, the CLUT should always be 1024 bytes.
memcpy(rect.pBits, rawClut, 1024);
tex->texture->UnlockRect(0);
pD3Ddevice->SetSamplerState(1, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
pD3Ddevice->SetSamplerState(1, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
pD3Ddevice->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_POINT);
pD3Ddevice->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
tex->lastFrame = gpuStats.numFlips;
texCache_[realClutID] = tex;
return tex->texture;
}
void DepalShaderCacheDX9::Clear() {
for (auto shader = cache_.begin(); shader != cache_.end(); ++shader) {
shader->second->pixelShader->Release();
delete shader->second;
}
cache_.clear();
for (auto tex = texCache_.begin(); tex != texCache_.end(); ++tex) {
tex->second->texture->Release();
delete tex->second;
}
texCache_.clear();
}
void DepalShaderCacheDX9::Decimate() {
for (auto tex = texCache_.begin(); tex != texCache_.end();) {
if (tex->second->lastFrame + DEPAL_TEXTURE_OLD_AGE < gpuStats.numFlips) {
tex->second->texture->Release();
delete tex->second;
texCache_.erase(tex++);
} else {
++tex;
}
}
}
LPDIRECT3DPIXELSHADER9 DepalShaderCacheDX9::GetDepalettizePixelShader(GEBufferFormat pixelFormat) {
u32 id = GenerateShaderID(pixelFormat);
auto shader = cache_.find(id);
if (shader != cache_.end()) {
return shader->second->pixelShader;
}
char *buffer = new char[2048];
GenerateDepalShader(buffer, pixelFormat, HLSL_DX9);
LPDIRECT3DPIXELSHADER9 pshader;
std::string errorMessage;
if (!CompilePixelShader(buffer, &pshader, NULL, errorMessage)) {
ERROR_LOG(G3D, "Failed to compile depal pixel shader: %s\n\n%s", buffer, errorMessage.c_str());
delete[] buffer;
return nullptr;
}
DepalShaderDX9 *depal = new DepalShaderDX9();
depal->pixelShader = pshader;
cache_[id] = depal;
delete[] buffer;
return depal->pixelShader;
}
} // namespace

View File

@ -0,0 +1,59 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <map>
#include "Common/CommonTypes.h"
#include "GPU/ge_constants.h"
#include "GPU/Directx9/helper/global.h"
namespace DX9 {
class DepalShaderDX9 {
public:
LPDIRECT3DPIXELSHADER9 pixelShader;
};
class DepalTextureDX9 {
public:
LPDIRECT3DTEXTURE9 texture;
int lastFrame;
};
// Caches both shaders and palette textures.
class DepalShaderCacheDX9 {
public:
DepalShaderCacheDX9();
~DepalShaderCacheDX9();
// This also uploads the palette and binds the correct texture.
LPDIRECT3DPIXELSHADER9 GetDepalettizePixelShader(GEBufferFormat pixelFormat);
LPDIRECT3DVERTEXSHADER9 GetDepalettizeVertexShader() { return vertexShader_; }
LPDIRECT3DTEXTURE9 GetClutTexture(const u32 clutHash, u32 *rawClut);
void Clear();
void Decimate();
private:
u32 GenerateShaderID(GEBufferFormat pixelFormat);
LPDIRECT3DVERTEXSHADER9 vertexShader_;
std::map<u32, DepalShaderDX9 *> cache_;
std::map<u32, DepalTextureDX9 *> texCache_;
};
} // namespace

View File

@ -63,33 +63,33 @@ namespace DX9 {
void CenterRect(float *x, float *y, float *w, float *h,
float origW, float origH, float frameW, float frameH) {
if (g_Config.bStretchToDisplay) {
*x = 0;
*y = 0;
*w = frameW;
*h = frameH;
return;
*x = 0;
*y = 0;
*w = frameW;
*h = frameH;
return;
}
float origRatio = origW/origH;
float frameRatio = frameW/frameH;
if (origRatio > frameRatio) {
// Image is wider than frame. Center vertically.
float scale = origW / frameW;
*x = 0.0f;
*w = frameW;
*h = frameW / origRatio;
// Stretch a little bit
if (g_Config.bPartialStretch)
*h = (frameH + *h) / 2.0f; // (408 + 720) / 2 = 564
*y = (frameH - *h) / 2.0f;
// Image is wider than frame. Center vertically.
float scale = origW / frameW;
*x = 0.0f;
*w = frameW;
*h = frameW / origRatio;
// Stretch a little bit
if (g_Config.bPartialStretch)
*h = (frameH + *h) / 2.0f; // (408 + 720) / 2 = 564
*y = (frameH - *h) / 2.0f;
} else {
// Image is taller than frame. Center horizontally.
float scale = origH / frameH;
*y = 0.0f;
*h = frameH;
*w = frameH * origRatio;
*x = (frameW - *w) / 2.0f;
// Image is taller than frame. Center horizontally.
float scale = origH / frameH;
*y = 0.0f;
*h = frameH;
*w = frameH * origRatio;
*x = (frameW - *w) / 2.0f;
}
}
@ -202,8 +202,6 @@ namespace DX9 {
convBuf = (u8*)rect.pBits;
// Final format is BGRA(directx)
// TODO: We can just change the texture format and flip some bits around instead of this.
if (srcPixelFormat != GE_FORMAT_8888 || srcStride != 512) {
for (int y = 0; y < height; y++) {
switch (srcPixelFormat) {

View File

@ -73,7 +73,7 @@ public:
void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst);
void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, bool skipCopy);
void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, bool skipCopy = false);
virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;

View File

@ -403,6 +403,7 @@ DIRECTX9_GPU::DIRECTX9_GPU()
framebufferManager_.SetShaderManager(shaderManager_);
framebufferManager_.SetTransformDrawEngine(&transformDraw_);
textureCache_.SetFramebufferManager(&framebufferManager_);
textureCache_.SetDepalShaderCache(&depalShaderCache_);
textureCache_.SetShaderManager(shaderManager_);
// Sanity check gstate
@ -525,7 +526,7 @@ void DIRECTX9_GPU::BeginFrameInternal() {
textureCache_.StartFrame();
transformDraw_.DecimateTrackedVertexArrays();
// depalShaderCache_.Decimate();
depalShaderCache_.Decimate();
// fragmentTestCache_.Decimate();
if (dumpNextFrame_) {

View File

@ -24,6 +24,7 @@
#include "GPU/Directx9/FramebufferDX9.h"
#include "GPU/Directx9/TransformPipelineDX9.h"
#include "GPU/Directx9/TextureCacheDX9.h"
#include "GPU/Directx9/DepalettizeShaderDX9.h"
#include "GPU/Directx9/helper/fbo.h"
#include "GPU/Common/VertexDecoderCommon.h"
@ -167,6 +168,7 @@ private:
FramebufferManagerDX9 framebufferManager_;
TextureCacheDX9 textureCache_;
DepalShaderCacheDX9 depalShaderCache_;
TransformDrawEngineDX9 transformDraw_;
ShaderManagerDX9 *shaderManager_;

View File

@ -26,6 +26,8 @@
#include "GPU/Directx9/PixelShaderGeneratorDX9.h"
#include "GPU/Directx9/TextureCacheDX9.h"
#include "GPU/Directx9/FramebufferDX9.h"
#include "GPU/Directx9/ShaderManagerDX9.h"
#include "GPU/Directx9/DepalettizeShaderDX9.h"
#include "GPU/Directx9/helper/dx_state.h"
#include "GPU/Common/FramebufferCommon.h"
#include "GPU/Common/TextureDecoder.h"
@ -897,21 +899,84 @@ void TextureCacheDX9::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebu
framebuffer->usageFlags |= FB_USAGE_TEXTURE;
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
if (useBufferedRendering) {
// TODO: Depal
// For now, let's not bind FBOs that we know are off (invalidHint will be -1.)
// But let's still not use random memory.
if (entry->framebuffer->fbo) {
fbo_bind_color_as_texture(entry->framebuffer->fbo, 0);
// Keep the framebuffer alive.
// TODO: Dangerous if it sets a new one?
entry->framebuffer->last_frame_used = gpuStats.numFlips;
} else {
pD3Ddevice->SetTexture(0, NULL);
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
LPDIRECT3DPIXELSHADER9 pshader = nullptr;
if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) {
pshader = depalShaderCache_->GetDepalettizePixelShader(framebuffer->drawnFormat);
}
gstate_c.textureFullAlpha = gstate.getTextureFormat() == GE_TFMT_5650;
gstate_c.textureSimpleAlpha = gstate_c.textureFullAlpha;
if (pshader) {
LPDIRECT3DTEXTURE9 clutTexture = depalShaderCache_->GetClutTexture(clutHash_, clutBuf_);
FBO *depalFBO = framebufferManager_->GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, FBO_8888);
fbo_bind_as_render_target(depalFBO);
float xoff = -0.5f / framebuffer->renderWidth;
float yoff = 0.5f / framebuffer->renderHeight;
const float pos[12 + 8] = {
-1 + xoff, 1 + yoff, 0, 0, 0,
1 + xoff, 1 + yoff, 0, 1, 0,
1 + xoff, -1 + yoff, 0, 1, 1,
-1 + xoff, -1 + yoff, 0, 0, 1,
};
shaderManager_->DirtyLastShader();
pD3Ddevice->SetPixelShader(pshader);
pD3Ddevice->SetVertexShader(depalShaderCache_->GetDepalettizeVertexShader());
pD3Ddevice->SetVertexDeclaration(pFramebufferVertexDecl);
pD3Ddevice->SetTexture(1, clutTexture);
pD3Ddevice->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_POINT);
pD3Ddevice->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
pD3Ddevice->SetSamplerState(1, D3DSAMP_MIPFILTER, D3DTEXF_NONE);
framebufferManager_->BindFramebufferColor(0, framebuffer, true);
pD3Ddevice->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
pD3Ddevice->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
pD3Ddevice->SetSamplerState(0, D3DSAMP_MIPFILTER, D3DTEXF_NONE);
pD3Ddevice->SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE);
pD3Ddevice->SetRenderState(D3DRS_SEPARATEALPHABLENDENABLE, FALSE);
pD3Ddevice->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_ALPHA);
pD3Ddevice->SetRenderState(D3DRS_ZENABLE, FALSE);
pD3Ddevice->SetRenderState(D3DRS_STENCILENABLE, FALSE);
pD3Ddevice->SetRenderState(D3DRS_SCISSORTESTENABLE, FALSE);
pD3Ddevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE);
D3DVIEWPORT9 vp;
vp.MinZ = 0;
vp.MaxZ = 1;
vp.X = 0;
vp.Y = 0;
vp.Width = framebuffer->renderWidth;
vp.Height = framebuffer->renderHeight;
pD3Ddevice->SetViewport(&vp);
HRESULT hr = pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, pos, (3 + 2) * sizeof(float));
if (FAILED(hr)) {
ERROR_LOG_REPORT(G3D, "Depal render failed: %08x", hr);
}
framebufferManager_->RebindFramebuffer();
fbo_bind_color_as_texture(depalFBO, 0);
dxstate.Restore();
dxstate.viewport.restore();
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
const u32 clutBase = gstate.getClutIndexStartPos();
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutExtendedColors = (clutTotalBytes_ / bytesPerColor) + clutBase;
TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(gstate.getClutPaletteFormat()), clutExtendedColors, clutExtendedColors, 1);
gstate_c.textureFullAlpha = alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL;
gstate_c.textureSimpleAlpha = alphaStatus == TexCacheEntry::STATUS_ALPHA_SIMPLE;
} else {
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;
framebufferManager_->BindFramebufferColor(0, framebuffer);
gstate_c.textureFullAlpha = gstate.getTextureFormat() == GE_TFMT_5650;
gstate_c.textureSimpleAlpha = gstate_c.textureFullAlpha;
}
// Keep the framebuffer alive.
framebuffer->last_frame_used = gpuStats.numFlips;

View File

@ -32,6 +32,7 @@ struct VirtualFramebuffer;
namespace DX9 {
class FramebufferManagerDX9;
class DepalShaderCacheDX9;
class ShaderManagerDX9;
enum TextureFiltering {
@ -69,6 +70,9 @@ public:
void SetFramebufferManager(FramebufferManagerDX9 *fbManager) {
framebufferManager_ = fbManager;
}
void SetDepalShaderCache(DepalShaderCacheDX9 *dpCache) {
depalShaderCache_ = dpCache;
}
void SetShaderManager(ShaderManagerDX9 *sm) {
shaderManager_ = sm;
}
@ -223,6 +227,7 @@ private:
int timesInvalidatedAllThisFrame_;
FramebufferManagerDX9 *framebufferManager_;
DepalShaderCacheDX9 *depalShaderCache_;
ShaderManagerDX9 *shaderManager_;
};

View File

@ -23,6 +23,7 @@
#include "DepalettizeShader.h"
#include "GPU/GPUState.h"
#include "GPU/GLES/TextureCache.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
static const int DEPAL_TEXTURE_OLD_AGE = 120;
@ -107,216 +108,6 @@ DepalShaderCache::~DepalShaderCache() {
glDeleteShader(vertexShader_);
}
#define WRITE p+=sprintf
// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) {
char *p = buffer;
#ifdef USING_GLES2
WRITE(p, "#version 300 es\n");
WRITE(p, "precision mediump float;\n");
#else
WRITE(p, "#version 330\n");
#endif
WRITE(p, "in vec2 v_texcoord0;\n");
WRITE(p, "out vec4 fragColor0;\n");
WRITE(p, "uniform sampler2D tex;\n");
WRITE(p, "uniform sampler2D pal;\n");
WRITE(p, "void main() {\n");
WRITE(p, " vec4 color = texture(tex, v_texcoord0);\n");
int mask = gstate.getClutIndexMask();
int shift = gstate.getClutIndexShift();
int offset = gstate.getClutIndexStartPos();
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
// Unfortunately sampling turned our texture into floating point. To avoid this, might be able
// to declare them as isampler2D objects, but these require integer textures, which needs more work.
// Anyhow, we simply work around this by converting back to integer. Hopefully there will be no loss of precision.
// Use the mask to skip reading some components.
int shiftedMask = mask << shift;
switch (pixelFormat) {
case GE_FORMAT_8888:
if (shiftedMask & 0xFF) WRITE(p, " int r = int(color.r * 255.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0xFF00) WRITE(p, " int g = int(color.g * 255.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0xFF0000) WRITE(p, " int b = int(color.b * 255.99);\n"); else WRITE(p, " int b = 0;\n");
if (shiftedMask & 0xFF000000) WRITE(p, " int a = int(color.a * 255.99);\n"); else WRITE(p, " int a = 0;\n");
WRITE(p, " int index = (a << 24) | (b << 16) | (g << 8) | (r);\n");
break;
case GE_FORMAT_4444:
if (shiftedMask & 0xF) WRITE(p, " int r = int(color.r * 15.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0xF0) WRITE(p, " int g = int(color.g * 15.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0xF00) WRITE(p, " int b = int(color.b * 15.99);\n"); else WRITE(p, " int b = 0;\n");
if (shiftedMask & 0xF000) WRITE(p, " int a = int(color.a * 15.99);\n"); else WRITE(p, " int a = 0;\n");
WRITE(p, " int index = (a << 12) | (b << 8) | (g << 4) | (r);\n");
break;
case GE_FORMAT_565:
if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0x7E0) WRITE(p, " int g = int(color.g * 63.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0xF800) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n");
WRITE(p, " int index = (b << 11) | (g << 5) | (r);\n");
break;
case GE_FORMAT_5551:
if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0x3E0) WRITE(p, " int g = int(color.g * 31.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0x7C00) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n");
if (shiftedMask & 0x8000) WRITE(p, " int a = int(color.a);\n"); else WRITE(p, " int a = 0;\n");
WRITE(p, " int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
break;
default:
break;
}
float texturePixels = 256;
if (clutFormat != GE_CMODE_32BIT_ABGR8888)
texturePixels = 512;
if (shift) {
WRITE(p, " index = ((index >> %i) & 0x%02x)", shift, mask);
} else {
WRITE(p, " index = (index & 0x%02x)", mask);
}
if (offset) {
WRITE(p, " | %i;\n", offset); // '|' matches what we have in gstate.h
} else {
WRITE(p, ";\n");
}
WRITE(p, " fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
WRITE(p, "}\n");
}
// FP only, to suit GL(ES) 2.0
void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) {
char *p = buffer;
char lookupMethod[128] = "index.r";
char offset[128] = "";
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
const u32 clutBase = gstate.getClutIndexStartPos();
const int shift = gstate.getClutIndexShift();
const int mask = gstate.getClutIndexMask();
float index_multiplier = 1.0f;
// pixelformat is the format of the texture we are sampling.
bool formatOK = true;
switch (pixelFormat) {
case GE_FORMAT_8888:
if ((mask & (mask + 1)) == 0) {
// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
const char *rgba = "rrrrrrrrggggggggbbbbbbbbaaaaaaaa";
const u8 rgba_shift = shift & 7;
if (rgba_shift == 0 && mask == 0xFF) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
} else {
sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], 255.99f / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
// Format was OK if there weren't bits from another component.
formatOK = mask <= 255 - (1 << rgba_shift);
}
} else {
formatOK = false;
}
break;
case GE_FORMAT_4444:
if ((mask & (mask + 1)) == 0 && shift < 16) {
const char *rgba = "rrrrggggbbbbaaaa";
const u8 rgba_shift = shift & 3;
if (rgba_shift == 0 && mask == 0xF) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
index_multiplier = 15.0f / 256.0f;
} else {
// Let's divide and mod to get the right bits. A common case is shift=0, mask=01.
sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], 15.99f / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
formatOK = mask <= 15 - (1 << rgba_shift);
}
} else {
formatOK = false;
}
break;
case GE_FORMAT_565:
if ((mask & (mask + 1)) == 0 && shift < 16) {
const u8 shifts[16] = {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4};
const int multipliers[16] = {31, 31, 31, 31, 31, 63, 63, 63, 63, 63, 63, 31, 31, 31, 31, 31};
const char *rgba = "rrrrrggggggbbbbb";
const u8 rgba_shift = shifts[shift];
if (rgba_shift == 0 && mask == multipliers[shift]) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
index_multiplier = multipliers[shift] / 256.0f;
} else {
// We just need to divide the right component by the right value, and then mod against the mask.
// A common case is shift=1, mask=0f.
sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
formatOK = mask <= multipliers[shift] - (1 << rgba_shift);
}
} else {
formatOK = false;
}
break;
case GE_FORMAT_5551:
if ((mask & (mask + 1)) == 0 && shift < 16) {
const char *rgba = "rrrrrgggggbbbbba";
const u8 rgba_shift = shift % 5;
if (rgba_shift == 0 && mask == 0x1F) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
index_multiplier = 31.0f / 256.0f;
} else if (shift == 15 && mask == 1) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
index_multiplier = 1.0f / 256.0f;
} else {
// A isn't possible here.
sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], 31.99f / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
formatOK = mask <= 31 - (1 << rgba_shift);
}
} else {
formatOK = false;
}
break;
default:
break;
}
float texturePixels = 256.f;
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.f;
index_multiplier *= 0.5f;
}
// Adjust index_multiplier, similar to the use of 15.99 instead of 16 in the ES 3 path.
// index_multiplier -= 0.01f / texturePixels;
if (!formatOK) {
ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", pixelFormat, shift, mask, clutBase);
}
// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
float texel_offset = ((float)clutBase + 0.5f) / texturePixels;
sprintf(offset, " + %f", texel_offset);
#ifdef USING_GLES2
WRITE(p, "#version 100\n");
WRITE(p, "precision mediump float;\n");
#else
WRITE(p, "#version 110\n");
#endif
WRITE(p, "varying vec2 v_texcoord0;\n");
WRITE(p, "uniform sampler2D tex;\n");
WRITE(p, "uniform sampler2D pal;\n");
WRITE(p, "void main() {\n");
WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n");
WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
WRITE(p, " gl_FragColor = texture2D(pal, vec2(coord, 0.0));\n");
WRITE(p, "}\n");
}
#undef WRITE
u32 DepalShaderCache::GenerateShaderID(GEBufferFormat pixelFormat) {
return (gstate.clutformat & 0xFFFFFF) | (pixelFormat << 24);
}
@ -394,11 +185,7 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) {
char *buffer = new char[2048];
if (useGL3_) {
GenerateDepalShader300(buffer, pixelFormat);
} else {
GenerateDepalShader100(buffer, pixelFormat);
}
GenerateDepalShader(buffer, pixelFormat, useGL3_ ? GLSL_300 : GLSL_140);
GLuint fragShader = glCreateShader(GL_FRAGMENT_SHADER);

View File

@ -181,6 +181,7 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="..\ext\xbrz\xbrz.h" />
<ClInclude Include="Common\DepalettizeShaderCommon.h" />
<ClInclude Include="Common\DrawEngineCommon.h" />
<ClInclude Include="Common\FramebufferCommon.h" />
<ClInclude Include="Common\GPUDebugInterface.h" />
@ -199,6 +200,7 @@
<ClInclude Include="Common\VertexDecoderCommon.h" />
<ClInclude Include="Debugger\Breakpoints.h" />
<ClInclude Include="Debugger\Stepping.h" />
<ClInclude Include="Directx9\DepalettizeShaderDX9.h" />
<ClInclude Include="Directx9\GPU_DX9.h" />
<ClInclude Include="Directx9\helper\dx_state.h" />
<ClInclude Include="Directx9\helper\fbo.h" />
@ -239,6 +241,7 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\ext\xbrz\xbrz.cpp" />
<ClCompile Include="Common\DepalettizeShaderCommon.cpp" />
<ClCompile Include="Common\DrawEngineCommon.cpp" />
<ClCompile Include="Common\FramebufferCommon.cpp" />
<ClCompile Include="Common\IndexGenerator.cpp" />
@ -263,6 +266,7 @@
<ClCompile Include="Common\VertexDecoderX86.cpp" />
<ClCompile Include="Debugger\Breakpoints.cpp" />
<ClCompile Include="Debugger\Stepping.cpp" />
<ClCompile Include="Directx9\DepalettizeShaderDX9.cpp" />
<ClCompile Include="Directx9\GPU_DX9.cpp" />
<ClCompile Include="Directx9\helper\dx_state.cpp" />
<ClCompile Include="Directx9\helper\fbo.cpp" />

View File

@ -177,6 +177,12 @@
<ClInclude Include="Common\DrawEngineCommon.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="Common\DepalettizeShaderCommon.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="Directx9\DepalettizeShaderDX9.h">
<Filter>DirectX9</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Math3D.cpp">
@ -338,6 +344,12 @@
<ClCompile Include="Directx9\StencilBufferDX9.cpp">
<Filter>DirectX9</Filter>
</ClCompile>
<ClCompile Include="Common\DepalettizeShaderCommon.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="Directx9\DepalettizeShaderDX9.cpp">
<Filter>DirectX9</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />

View File

@ -41,6 +41,7 @@ SOURCES += $$P/GPU/GeDisasm.cpp \ # GPU
$$P/GPU/GLES/VertexShaderGenerator.cpp \
$$P/GPU/Software/*.cpp \
$$P/GPU/Debugger/*.cpp \
$$P/GPU/Common/DepalettizeShaderCommon.cpp \
$$P/GPU/Common/IndexGenerator.cpp \
$$P/GPU/Common/TextureDecoder.cpp \
$$P/GPU/Common/VertexDecoderCommon.cpp \

View File

@ -148,6 +148,7 @@ EXEC_AND_LIB_FILES := \
$(SRC)/GPU/GPUCommon.cpp \
$(SRC)/GPU/GPUState.cpp \
$(SRC)/GPU/GeDisasm.cpp \
$(SRC)/GPU/Common/DepalettizeShaderCommon.cpp \
$(SRC)/GPU/Common/FramebufferCommon.cpp \
$(SRC)/GPU/Common/IndexGenerator.cpp.arm \
$(SRC)/GPU/Common/SoftwareTransformCommon.cpp.arm \