mirror of
https://github.com/libretro/ppsspp.git
synced 2025-03-03 14:09:45 +00:00
Merge pull request #6920 from hrydgard/dx9-depal
Dx9 depal - needs debugging before merge
This commit is contained in:
commit
c0450f7f9e
@ -1378,6 +1378,8 @@ if(ARMV7)
|
||||
set(GPU_NEON GPU/Common/TextureDecoderNEON.cpp)
|
||||
endif()
|
||||
add_library(GPU OBJECT
|
||||
GPU/Common/DepalettizeShaderCommon.cpp
|
||||
GPU/Common/DepalettizeShaderCommon.h
|
||||
GPU/Common/FramebufferCommon.cpp
|
||||
GPU/Common/FramebufferCommon.h
|
||||
GPU/Common/GPUDebugInterface.h
|
||||
|
259
GPU/Common/DepalettizeShaderCommon.cpp
Normal file
259
GPU/Common/DepalettizeShaderCommon.cpp
Normal file
@ -0,0 +1,259 @@
|
||||
// Copyright (c) 2014- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "Common/Log.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Common/DepalettizeShaderCommon.h"
|
||||
|
||||
|
||||
#define WRITE p+=sprintf
|
||||
|
||||
// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
|
||||
void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) {
|
||||
char *p = buffer;
|
||||
#ifdef USING_GLES2
|
||||
WRITE(p, "#version 300 es\n");
|
||||
WRITE(p, "precision mediump float;\n");
|
||||
#else
|
||||
WRITE(p, "#version 330\n");
|
||||
#endif
|
||||
WRITE(p, "in vec2 v_texcoord0;\n");
|
||||
WRITE(p, "out vec4 fragColor0;\n");
|
||||
WRITE(p, "uniform sampler2D tex;\n");
|
||||
WRITE(p, "uniform sampler2D pal;\n");
|
||||
|
||||
WRITE(p, "void main() {\n");
|
||||
WRITE(p, " vec4 color = texture(tex, v_texcoord0);\n");
|
||||
|
||||
int mask = gstate.getClutIndexMask();
|
||||
int shift = gstate.getClutIndexShift();
|
||||
int offset = gstate.getClutIndexStartPos();
|
||||
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
|
||||
// Unfortunately sampling turned our texture into floating point. To avoid this, might be able
|
||||
// to declare them as isampler2D objects, but these require integer textures, which needs more work.
|
||||
// Anyhow, we simply work around this by converting back to integer. Hopefully there will be no loss of precision.
|
||||
// Use the mask to skip reading some components.
|
||||
int shiftedMask = mask << shift;
|
||||
switch (pixelFormat) {
|
||||
case GE_FORMAT_8888:
|
||||
if (shiftedMask & 0xFF) WRITE(p, " int r = int(color.r * 255.99);\n"); else WRITE(p, " int r = 0;\n");
|
||||
if (shiftedMask & 0xFF00) WRITE(p, " int g = int(color.g * 255.99);\n"); else WRITE(p, " int g = 0;\n");
|
||||
if (shiftedMask & 0xFF0000) WRITE(p, " int b = int(color.b * 255.99);\n"); else WRITE(p, " int b = 0;\n");
|
||||
if (shiftedMask & 0xFF000000) WRITE(p, " int a = int(color.a * 255.99);\n"); else WRITE(p, " int a = 0;\n");
|
||||
WRITE(p, " int index = (a << 24) | (b << 16) | (g << 8) | (r);\n");
|
||||
break;
|
||||
case GE_FORMAT_4444:
|
||||
if (shiftedMask & 0xF) WRITE(p, " int r = int(color.r * 15.99);\n"); else WRITE(p, " int r = 0;\n");
|
||||
if (shiftedMask & 0xF0) WRITE(p, " int g = int(color.g * 15.99);\n"); else WRITE(p, " int g = 0;\n");
|
||||
if (shiftedMask & 0xF00) WRITE(p, " int b = int(color.b * 15.99);\n"); else WRITE(p, " int b = 0;\n");
|
||||
if (shiftedMask & 0xF000) WRITE(p, " int a = int(color.a * 15.99);\n"); else WRITE(p, " int a = 0;\n");
|
||||
WRITE(p, " int index = (a << 12) | (b << 8) | (g << 4) | (r);\n");
|
||||
break;
|
||||
case GE_FORMAT_565:
|
||||
if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n");
|
||||
if (shiftedMask & 0x7E0) WRITE(p, " int g = int(color.g * 63.99);\n"); else WRITE(p, " int g = 0;\n");
|
||||
if (shiftedMask & 0xF800) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n");
|
||||
WRITE(p, " int index = (b << 11) | (g << 5) | (r);\n");
|
||||
break;
|
||||
case GE_FORMAT_5551:
|
||||
if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n");
|
||||
if (shiftedMask & 0x3E0) WRITE(p, " int g = int(color.g * 31.99);\n"); else WRITE(p, " int g = 0;\n");
|
||||
if (shiftedMask & 0x7C00) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n");
|
||||
if (shiftedMask & 0x8000) WRITE(p, " int a = int(color.a);\n"); else WRITE(p, " int a = 0;\n");
|
||||
WRITE(p, " int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
float texturePixels = 256;
|
||||
if (clutFormat != GE_CMODE_32BIT_ABGR8888)
|
||||
texturePixels = 512;
|
||||
|
||||
if (shift) {
|
||||
WRITE(p, " index = ((index >> %i) & 0x%02x)", shift, mask);
|
||||
} else {
|
||||
WRITE(p, " index = (index & 0x%02x)", mask);
|
||||
}
|
||||
if (offset) {
|
||||
WRITE(p, " | %i;\n", offset); // '|' matches what we have in gstate.h
|
||||
} else {
|
||||
WRITE(p, ";\n");
|
||||
}
|
||||
|
||||
WRITE(p, " fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
|
||||
// FP only, to suit GL(ES) 2.0
|
||||
void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage lang) {
|
||||
char *p = buffer;
|
||||
|
||||
const char *modFunc = lang == HLSL_DX9 ? "fmod" : "mod";
|
||||
|
||||
char lookupMethod[128] = "index.r";
|
||||
char offset[128] = "";
|
||||
|
||||
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
|
||||
const u32 clutBase = gstate.getClutIndexStartPos();
|
||||
|
||||
const int shift = gstate.getClutIndexShift();
|
||||
const int mask = gstate.getClutIndexMask();
|
||||
|
||||
float index_multiplier = 1.0f;
|
||||
// pixelformat is the format of the texture we are sampling.
|
||||
bool formatOK = true;
|
||||
switch (pixelFormat) {
|
||||
case GE_FORMAT_8888:
|
||||
if ((mask & (mask + 1)) == 0) {
|
||||
// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
|
||||
const char *rgba = "rrrrrrrrggggggggbbbbbbbbaaaaaaaa";
|
||||
const u8 rgba_shift = shift & 7;
|
||||
if (rgba_shift == 0 && mask == 0xFF) {
|
||||
sprintf(lookupMethod, "index.%c", rgba[shift]);
|
||||
} else {
|
||||
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 255.99f / (1 << rgba_shift), mask + 1);
|
||||
index_multiplier = 1.0f / 256.0f;
|
||||
// Format was OK if there weren't bits from another component.
|
||||
formatOK = mask <= 255 - (1 << rgba_shift);
|
||||
}
|
||||
} else {
|
||||
formatOK = false;
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_4444:
|
||||
if ((mask & (mask + 1)) == 0 && shift < 16) {
|
||||
const char *rgba = "rrrrggggbbbbaaaa";
|
||||
const u8 rgba_shift = shift & 3;
|
||||
if (rgba_shift == 0 && mask == 0xF) {
|
||||
sprintf(lookupMethod, "index.%c", rgba[shift]);
|
||||
index_multiplier = 15.0f / 256.0f;
|
||||
} else {
|
||||
// Let's divide and mod to get the right bits. A common case is shift=0, mask=01.
|
||||
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 15.99f / (1 << rgba_shift), mask + 1);
|
||||
index_multiplier = 1.0f / 256.0f;
|
||||
formatOK = mask <= 15 - (1 << rgba_shift);
|
||||
}
|
||||
} else {
|
||||
formatOK = false;
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_565:
|
||||
if ((mask & (mask + 1)) == 0 && shift < 16) {
|
||||
const u8 shifts[16] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4 };
|
||||
const int multipliers[16] = { 31, 31, 31, 31, 31, 63, 63, 63, 63, 63, 63, 31, 31, 31, 31, 31 };
|
||||
const char *rgba = "rrrrrggggggbbbbb";
|
||||
const u8 rgba_shift = shifts[shift];
|
||||
if (rgba_shift == 0 && mask == multipliers[shift]) {
|
||||
sprintf(lookupMethod, "index.%c", rgba[shift]);
|
||||
index_multiplier = multipliers[shift] / 256.0f;
|
||||
} else {
|
||||
// We just need to divide the right component by the right value, and then mod against the mask.
|
||||
// A common case is shift=1, mask=0f.
|
||||
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1);
|
||||
index_multiplier = 1.0f / 256.0f;
|
||||
formatOK = mask <= multipliers[shift] - (1 << rgba_shift);
|
||||
}
|
||||
} else {
|
||||
formatOK = false;
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_5551:
|
||||
if ((mask & (mask + 1)) == 0 && shift < 16) {
|
||||
const char *rgba = "rrrrrgggggbbbbba";
|
||||
const u8 rgba_shift = shift % 5;
|
||||
if (rgba_shift == 0 && mask == 0x1F) {
|
||||
sprintf(lookupMethod, "index.%c", rgba[shift]);
|
||||
index_multiplier = 31.0f / 256.0f;
|
||||
} else if (shift == 15 && mask == 1) {
|
||||
sprintf(lookupMethod, "index.%c", rgba[shift]);
|
||||
index_multiplier = 1.0f / 256.0f;
|
||||
} else {
|
||||
// A isn't possible here.
|
||||
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 31.99f / (1 << rgba_shift), mask + 1);
|
||||
index_multiplier = 1.0f / 256.0f;
|
||||
formatOK = mask <= 31 - (1 << rgba_shift);
|
||||
}
|
||||
} else {
|
||||
formatOK = false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
float texturePixels = 256.f;
|
||||
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
|
||||
texturePixels = 512.f;
|
||||
index_multiplier *= 0.5f;
|
||||
}
|
||||
|
||||
// Adjust index_multiplier, similar to the use of 15.99 instead of 16 in the ES 3 path.
|
||||
// index_multiplier -= 0.01f / texturePixels;
|
||||
|
||||
if (!formatOK) {
|
||||
ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", pixelFormat, shift, mask, clutBase);
|
||||
}
|
||||
|
||||
// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
|
||||
float texel_offset = ((float)clutBase + 0.5f) / texturePixels;
|
||||
sprintf(offset, " + %f", texel_offset);
|
||||
|
||||
if (lang == GLSL_140) {
|
||||
#ifdef USING_GLES2
|
||||
WRITE(p, "#version 100\n");
|
||||
WRITE(p, "precision mediump float;\n");
|
||||
#else
|
||||
WRITE(p, "#version 110\n");
|
||||
#endif
|
||||
WRITE(p, "varying vec2 v_texcoord0;\n");
|
||||
WRITE(p, "uniform sampler2D tex;\n");
|
||||
WRITE(p, "uniform sampler2D pal;\n");
|
||||
WRITE(p, "void main() {\n");
|
||||
WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n");
|
||||
WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
|
||||
WRITE(p, " gl_FragColor = texture2D(pal, vec2(coord, 0.0));\n");
|
||||
WRITE(p, "}\n");
|
||||
} else if (lang == HLSL_DX9) {
|
||||
WRITE(p, "sampler tex: register(s0);\n");
|
||||
WRITE(p, "sampler pal: register(s1);\n");
|
||||
WRITE(p, "float4 main(float2 v_texcoord0 : TEXCOORD0) : COLOR0 {\n");
|
||||
WRITE(p, " float4 index = tex2D(tex, v_texcoord0);\n");
|
||||
WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
|
||||
WRITE(p, " return tex2D(pal, float2(coord, 0.0)).bgra;\n");
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
}
|
||||
|
||||
void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language) {
|
||||
switch (language) {
|
||||
case GLSL_140:
|
||||
GenerateDepalShaderFloat(buffer, pixelFormat, language);
|
||||
break;
|
||||
case GLSL_300:
|
||||
GenerateDepalShader300(buffer, pixelFormat);
|
||||
break;
|
||||
case HLSL_DX9:
|
||||
GenerateDepalShaderFloat(buffer, pixelFormat, language);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#undef WRITE
|
28
GPU/Common/DepalettizeShaderCommon.h
Normal file
28
GPU/Common/DepalettizeShaderCommon.h
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright (c) 2014- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GPU/ge_constants.h"
|
||||
|
||||
enum ShaderLanguage {
|
||||
GLSL_140,
|
||||
GLSL_300,
|
||||
HLSL_DX9,
|
||||
};
|
||||
|
||||
void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language);
|
175
GPU/Directx9/DepalettizeShaderDX9.cpp
Normal file
175
GPU/Directx9/DepalettizeShaderDX9.cpp
Normal file
@ -0,0 +1,175 @@
|
||||
// Copyright (c) 2014- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "Common/Log.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Directx9/TextureCacheDX9.h"
|
||||
#include "GPU/Directx9/DepalettizeShaderDX9.h"
|
||||
#include "GPU/Common/DepalettizeShaderCommon.h"
|
||||
#include "GPU/Directx9/helper/global.h"
|
||||
|
||||
namespace DX9 {
|
||||
|
||||
static const int DEPAL_TEXTURE_OLD_AGE = 120;
|
||||
|
||||
#ifdef _WIN32
|
||||
#define SHADERLOG
|
||||
#endif
|
||||
|
||||
static const char *depalVShaderHLSL =
|
||||
"struct VS_IN {\n"
|
||||
" float3 a_position : POSITION;\n"
|
||||
" float2 a_texcoord0 : TEXCOORD0;\n"
|
||||
"};\n"
|
||||
"struct VS_OUT {\n"
|
||||
" float4 Position : POSITION;\n"
|
||||
" float2 Texcoord : TEXCOORD0;\n"
|
||||
"};\n"
|
||||
"VS_OUT main(VS_IN input) {\n"
|
||||
" VS_OUT output;\n"
|
||||
" output.Texcoord = input.a_texcoord0;\n"
|
||||
" output.Position = float4(input.a_position, 1.0);\n"
|
||||
" return output;\n"
|
||||
"}\n";
|
||||
|
||||
DepalShaderCacheDX9::DepalShaderCacheDX9() : vertexShader_(nullptr) {
|
||||
std::string errorMessage;
|
||||
if (!DX9::CompileVertexShader(depalVShaderHLSL, &vertexShader_, nullptr, errorMessage)) {
|
||||
ERROR_LOG(G3D, "error compling depal vshader: %s", errorMessage.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
DepalShaderCacheDX9::~DepalShaderCacheDX9() {
|
||||
Clear();
|
||||
if (vertexShader_) {
|
||||
vertexShader_->Release();
|
||||
}
|
||||
}
|
||||
|
||||
u32 DepalShaderCacheDX9::GenerateShaderID(GEBufferFormat pixelFormat) {
|
||||
return (gstate.clutformat & 0xFFFFFF) | (pixelFormat << 24);
|
||||
}
|
||||
|
||||
LPDIRECT3DTEXTURE9 DepalShaderCacheDX9::GetClutTexture(const u32 clutID, u32 *rawClut) {
|
||||
GEPaletteFormat palFormat = gstate.getClutPaletteFormat();
|
||||
const u32 realClutID = clutID ^ palFormat;
|
||||
|
||||
auto oldtex = texCache_.find(realClutID);
|
||||
if (oldtex != texCache_.end()) {
|
||||
oldtex->second->lastFrame = gpuStats.numFlips;
|
||||
return oldtex->second->texture;
|
||||
}
|
||||
|
||||
D3DFORMAT dstFmt = DX9::getClutDestFormat(palFormat);
|
||||
int texturePixels = palFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512;
|
||||
|
||||
DepalTextureDX9 *tex = new DepalTextureDX9();
|
||||
|
||||
// Create texture
|
||||
D3DPOOL pool = D3DPOOL_MANAGED;
|
||||
int usage = 0;
|
||||
if (pD3DdeviceEx) {
|
||||
pool = D3DPOOL_DEFAULT;
|
||||
usage = D3DUSAGE_DYNAMIC; // TODO: Switch to using a staging texture?
|
||||
}
|
||||
|
||||
HRESULT hr = pD3Ddevice->CreateTexture(texturePixels, 1, 1, usage, (D3DFORMAT)D3DFMT(dstFmt), pool, &tex->texture, NULL);
|
||||
if (FAILED(hr)) {
|
||||
ERROR_LOG(G3D, "Failed to create D3D texture for depal");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
D3DLOCKED_RECT rect;
|
||||
hr = tex->texture->LockRect(0, &rect, NULL, 0);
|
||||
if (FAILED(hr)) {
|
||||
ERROR_LOG(G3D, "Failed to lock D3D texture for depal");
|
||||
return nullptr;
|
||||
}
|
||||
// Regardless of format, the CLUT should always be 1024 bytes.
|
||||
memcpy(rect.pBits, rawClut, 1024);
|
||||
tex->texture->UnlockRect(0);
|
||||
|
||||
pD3Ddevice->SetSamplerState(1, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
|
||||
pD3Ddevice->SetSamplerState(1, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
|
||||
pD3Ddevice->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_POINT);
|
||||
pD3Ddevice->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
|
||||
|
||||
tex->lastFrame = gpuStats.numFlips;
|
||||
texCache_[realClutID] = tex;
|
||||
return tex->texture;
|
||||
}
|
||||
|
||||
void DepalShaderCacheDX9::Clear() {
|
||||
for (auto shader = cache_.begin(); shader != cache_.end(); ++shader) {
|
||||
shader->second->pixelShader->Release();
|
||||
delete shader->second;
|
||||
}
|
||||
cache_.clear();
|
||||
for (auto tex = texCache_.begin(); tex != texCache_.end(); ++tex) {
|
||||
tex->second->texture->Release();
|
||||
delete tex->second;
|
||||
}
|
||||
texCache_.clear();
|
||||
}
|
||||
|
||||
void DepalShaderCacheDX9::Decimate() {
|
||||
for (auto tex = texCache_.begin(); tex != texCache_.end();) {
|
||||
if (tex->second->lastFrame + DEPAL_TEXTURE_OLD_AGE < gpuStats.numFlips) {
|
||||
tex->second->texture->Release();
|
||||
delete tex->second;
|
||||
texCache_.erase(tex++);
|
||||
} else {
|
||||
++tex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LPDIRECT3DPIXELSHADER9 DepalShaderCacheDX9::GetDepalettizePixelShader(GEBufferFormat pixelFormat) {
|
||||
u32 id = GenerateShaderID(pixelFormat);
|
||||
|
||||
auto shader = cache_.find(id);
|
||||
if (shader != cache_.end()) {
|
||||
return shader->second->pixelShader;
|
||||
}
|
||||
|
||||
char *buffer = new char[2048];
|
||||
|
||||
GenerateDepalShader(buffer, pixelFormat, HLSL_DX9);
|
||||
|
||||
LPDIRECT3DPIXELSHADER9 pshader;
|
||||
std::string errorMessage;
|
||||
if (!CompilePixelShader(buffer, &pshader, NULL, errorMessage)) {
|
||||
ERROR_LOG(G3D, "Failed to compile depal pixel shader: %s\n\n%s", buffer, errorMessage.c_str());
|
||||
delete[] buffer;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DepalShaderDX9 *depal = new DepalShaderDX9();
|
||||
depal->pixelShader = pshader;
|
||||
|
||||
cache_[id] = depal;
|
||||
|
||||
delete[] buffer;
|
||||
|
||||
return depal->pixelShader;
|
||||
}
|
||||
|
||||
} // namespace
|
59
GPU/Directx9/DepalettizeShaderDX9.h
Normal file
59
GPU/Directx9/DepalettizeShaderDX9.h
Normal file
@ -0,0 +1,59 @@
|
||||
// Copyright (c) 2014- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
|
||||
#include "GPU/Directx9/helper/global.h"
|
||||
|
||||
namespace DX9 {
|
||||
|
||||
class DepalShaderDX9 {
|
||||
public:
|
||||
LPDIRECT3DPIXELSHADER9 pixelShader;
|
||||
};
|
||||
|
||||
class DepalTextureDX9 {
|
||||
public:
|
||||
LPDIRECT3DTEXTURE9 texture;
|
||||
int lastFrame;
|
||||
};
|
||||
|
||||
// Caches both shaders and palette textures.
|
||||
class DepalShaderCacheDX9 {
|
||||
public:
|
||||
DepalShaderCacheDX9();
|
||||
~DepalShaderCacheDX9();
|
||||
|
||||
// This also uploads the palette and binds the correct texture.
|
||||
LPDIRECT3DPIXELSHADER9 GetDepalettizePixelShader(GEBufferFormat pixelFormat);
|
||||
LPDIRECT3DVERTEXSHADER9 GetDepalettizeVertexShader() { return vertexShader_; }
|
||||
LPDIRECT3DTEXTURE9 GetClutTexture(const u32 clutHash, u32 *rawClut);
|
||||
void Clear();
|
||||
void Decimate();
|
||||
|
||||
private:
|
||||
u32 GenerateShaderID(GEBufferFormat pixelFormat);
|
||||
|
||||
LPDIRECT3DVERTEXSHADER9 vertexShader_;
|
||||
std::map<u32, DepalShaderDX9 *> cache_;
|
||||
std::map<u32, DepalTextureDX9 *> texCache_;
|
||||
};
|
||||
|
||||
} // namespace
|
@ -63,33 +63,33 @@ namespace DX9 {
|
||||
void CenterRect(float *x, float *y, float *w, float *h,
|
||||
float origW, float origH, float frameW, float frameH) {
|
||||
if (g_Config.bStretchToDisplay) {
|
||||
*x = 0;
|
||||
*y = 0;
|
||||
*w = frameW;
|
||||
*h = frameH;
|
||||
return;
|
||||
*x = 0;
|
||||
*y = 0;
|
||||
*w = frameW;
|
||||
*h = frameH;
|
||||
return;
|
||||
}
|
||||
|
||||
float origRatio = origW/origH;
|
||||
float frameRatio = frameW/frameH;
|
||||
|
||||
if (origRatio > frameRatio) {
|
||||
// Image is wider than frame. Center vertically.
|
||||
float scale = origW / frameW;
|
||||
*x = 0.0f;
|
||||
*w = frameW;
|
||||
*h = frameW / origRatio;
|
||||
// Stretch a little bit
|
||||
if (g_Config.bPartialStretch)
|
||||
*h = (frameH + *h) / 2.0f; // (408 + 720) / 2 = 564
|
||||
*y = (frameH - *h) / 2.0f;
|
||||
// Image is wider than frame. Center vertically.
|
||||
float scale = origW / frameW;
|
||||
*x = 0.0f;
|
||||
*w = frameW;
|
||||
*h = frameW / origRatio;
|
||||
// Stretch a little bit
|
||||
if (g_Config.bPartialStretch)
|
||||
*h = (frameH + *h) / 2.0f; // (408 + 720) / 2 = 564
|
||||
*y = (frameH - *h) / 2.0f;
|
||||
} else {
|
||||
// Image is taller than frame. Center horizontally.
|
||||
float scale = origH / frameH;
|
||||
*y = 0.0f;
|
||||
*h = frameH;
|
||||
*w = frameH * origRatio;
|
||||
*x = (frameW - *w) / 2.0f;
|
||||
// Image is taller than frame. Center horizontally.
|
||||
float scale = origH / frameH;
|
||||
*y = 0.0f;
|
||||
*h = frameH;
|
||||
*w = frameH * origRatio;
|
||||
*x = (frameW - *w) / 2.0f;
|
||||
}
|
||||
}
|
||||
|
||||
@ -202,8 +202,6 @@ namespace DX9 {
|
||||
convBuf = (u8*)rect.pBits;
|
||||
|
||||
// Final format is BGRA(directx)
|
||||
|
||||
// TODO: We can just change the texture format and flip some bits around instead of this.
|
||||
if (srcPixelFormat != GE_FORMAT_8888 || srcStride != 512) {
|
||||
for (int y = 0; y < height; y++) {
|
||||
switch (srcPixelFormat) {
|
||||
|
@ -73,7 +73,7 @@ public:
|
||||
|
||||
void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst);
|
||||
|
||||
void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, bool skipCopy);
|
||||
void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, bool skipCopy = false);
|
||||
|
||||
virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
|
||||
|
||||
|
@ -403,6 +403,7 @@ DIRECTX9_GPU::DIRECTX9_GPU()
|
||||
framebufferManager_.SetShaderManager(shaderManager_);
|
||||
framebufferManager_.SetTransformDrawEngine(&transformDraw_);
|
||||
textureCache_.SetFramebufferManager(&framebufferManager_);
|
||||
textureCache_.SetDepalShaderCache(&depalShaderCache_);
|
||||
textureCache_.SetShaderManager(shaderManager_);
|
||||
|
||||
// Sanity check gstate
|
||||
@ -525,7 +526,7 @@ void DIRECTX9_GPU::BeginFrameInternal() {
|
||||
|
||||
textureCache_.StartFrame();
|
||||
transformDraw_.DecimateTrackedVertexArrays();
|
||||
// depalShaderCache_.Decimate();
|
||||
depalShaderCache_.Decimate();
|
||||
// fragmentTestCache_.Decimate();
|
||||
|
||||
if (dumpNextFrame_) {
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "GPU/Directx9/FramebufferDX9.h"
|
||||
#include "GPU/Directx9/TransformPipelineDX9.h"
|
||||
#include "GPU/Directx9/TextureCacheDX9.h"
|
||||
#include "GPU/Directx9/DepalettizeShaderDX9.h"
|
||||
#include "GPU/Directx9/helper/fbo.h"
|
||||
#include "GPU/Common/VertexDecoderCommon.h"
|
||||
|
||||
@ -167,6 +168,7 @@ private:
|
||||
|
||||
FramebufferManagerDX9 framebufferManager_;
|
||||
TextureCacheDX9 textureCache_;
|
||||
DepalShaderCacheDX9 depalShaderCache_;
|
||||
TransformDrawEngineDX9 transformDraw_;
|
||||
ShaderManagerDX9 *shaderManager_;
|
||||
|
||||
|
@ -26,6 +26,8 @@
|
||||
#include "GPU/Directx9/PixelShaderGeneratorDX9.h"
|
||||
#include "GPU/Directx9/TextureCacheDX9.h"
|
||||
#include "GPU/Directx9/FramebufferDX9.h"
|
||||
#include "GPU/Directx9/ShaderManagerDX9.h"
|
||||
#include "GPU/Directx9/DepalettizeShaderDX9.h"
|
||||
#include "GPU/Directx9/helper/dx_state.h"
|
||||
#include "GPU/Common/FramebufferCommon.h"
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
@ -897,21 +899,84 @@ void TextureCacheDX9::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebu
|
||||
framebuffer->usageFlags |= FB_USAGE_TEXTURE;
|
||||
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
|
||||
if (useBufferedRendering) {
|
||||
// TODO: Depal
|
||||
// For now, let's not bind FBOs that we know are off (invalidHint will be -1.)
|
||||
// But let's still not use random memory.
|
||||
if (entry->framebuffer->fbo) {
|
||||
fbo_bind_color_as_texture(entry->framebuffer->fbo, 0);
|
||||
// Keep the framebuffer alive.
|
||||
// TODO: Dangerous if it sets a new one?
|
||||
entry->framebuffer->last_frame_used = gpuStats.numFlips;
|
||||
} else {
|
||||
pD3Ddevice->SetTexture(0, NULL);
|
||||
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
|
||||
LPDIRECT3DPIXELSHADER9 pshader = nullptr;
|
||||
if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) {
|
||||
pshader = depalShaderCache_->GetDepalettizePixelShader(framebuffer->drawnFormat);
|
||||
}
|
||||
|
||||
gstate_c.textureFullAlpha = gstate.getTextureFormat() == GE_TFMT_5650;
|
||||
gstate_c.textureSimpleAlpha = gstate_c.textureFullAlpha;
|
||||
if (pshader) {
|
||||
LPDIRECT3DTEXTURE9 clutTexture = depalShaderCache_->GetClutTexture(clutHash_, clutBuf_);
|
||||
|
||||
FBO *depalFBO = framebufferManager_->GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, FBO_8888);
|
||||
fbo_bind_as_render_target(depalFBO);
|
||||
|
||||
float xoff = -0.5f / framebuffer->renderWidth;
|
||||
float yoff = 0.5f / framebuffer->renderHeight;
|
||||
|
||||
const float pos[12 + 8] = {
|
||||
-1 + xoff, 1 + yoff, 0, 0, 0,
|
||||
1 + xoff, 1 + yoff, 0, 1, 0,
|
||||
1 + xoff, -1 + yoff, 0, 1, 1,
|
||||
-1 + xoff, -1 + yoff, 0, 0, 1,
|
||||
};
|
||||
|
||||
shaderManager_->DirtyLastShader();
|
||||
|
||||
pD3Ddevice->SetPixelShader(pshader);
|
||||
pD3Ddevice->SetVertexShader(depalShaderCache_->GetDepalettizeVertexShader());
|
||||
pD3Ddevice->SetVertexDeclaration(pFramebufferVertexDecl);
|
||||
pD3Ddevice->SetTexture(1, clutTexture);
|
||||
pD3Ddevice->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_POINT);
|
||||
pD3Ddevice->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
|
||||
pD3Ddevice->SetSamplerState(1, D3DSAMP_MIPFILTER, D3DTEXF_NONE);
|
||||
|
||||
framebufferManager_->BindFramebufferColor(0, framebuffer, true);
|
||||
pD3Ddevice->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
|
||||
pD3Ddevice->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
|
||||
pD3Ddevice->SetSamplerState(0, D3DSAMP_MIPFILTER, D3DTEXF_NONE);
|
||||
|
||||
pD3Ddevice->SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE);
|
||||
pD3Ddevice->SetRenderState(D3DRS_SEPARATEALPHABLENDENABLE, FALSE);
|
||||
pD3Ddevice->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_ALPHA);
|
||||
pD3Ddevice->SetRenderState(D3DRS_ZENABLE, FALSE);
|
||||
pD3Ddevice->SetRenderState(D3DRS_STENCILENABLE, FALSE);
|
||||
pD3Ddevice->SetRenderState(D3DRS_SCISSORTESTENABLE, FALSE);
|
||||
pD3Ddevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE);
|
||||
|
||||
D3DVIEWPORT9 vp;
|
||||
vp.MinZ = 0;
|
||||
vp.MaxZ = 1;
|
||||
vp.X = 0;
|
||||
vp.Y = 0;
|
||||
vp.Width = framebuffer->renderWidth;
|
||||
vp.Height = framebuffer->renderHeight;
|
||||
pD3Ddevice->SetViewport(&vp);
|
||||
|
||||
HRESULT hr = pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, pos, (3 + 2) * sizeof(float));
|
||||
if (FAILED(hr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Depal render failed: %08x", hr);
|
||||
}
|
||||
|
||||
framebufferManager_->RebindFramebuffer();
|
||||
fbo_bind_color_as_texture(depalFBO, 0);
|
||||
dxstate.Restore();
|
||||
dxstate.viewport.restore();
|
||||
|
||||
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
|
||||
const u32 clutBase = gstate.getClutIndexStartPos();
|
||||
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
|
||||
const u32 clutExtendedColors = (clutTotalBytes_ / bytesPerColor) + clutBase;
|
||||
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(gstate.getClutPaletteFormat()), clutExtendedColors, clutExtendedColors, 1);
|
||||
gstate_c.textureFullAlpha = alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL;
|
||||
gstate_c.textureSimpleAlpha = alphaStatus == TexCacheEntry::STATUS_ALPHA_SIMPLE;
|
||||
} else {
|
||||
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;
|
||||
framebufferManager_->BindFramebufferColor(0, framebuffer);
|
||||
|
||||
gstate_c.textureFullAlpha = gstate.getTextureFormat() == GE_TFMT_5650;
|
||||
gstate_c.textureSimpleAlpha = gstate_c.textureFullAlpha;
|
||||
}
|
||||
|
||||
// Keep the framebuffer alive.
|
||||
framebuffer->last_frame_used = gpuStats.numFlips;
|
||||
|
@ -32,6 +32,7 @@ struct VirtualFramebuffer;
|
||||
namespace DX9 {
|
||||
|
||||
class FramebufferManagerDX9;
|
||||
class DepalShaderCacheDX9;
|
||||
class ShaderManagerDX9;
|
||||
|
||||
enum TextureFiltering {
|
||||
@ -69,6 +70,9 @@ public:
|
||||
void SetFramebufferManager(FramebufferManagerDX9 *fbManager) {
|
||||
framebufferManager_ = fbManager;
|
||||
}
|
||||
void SetDepalShaderCache(DepalShaderCacheDX9 *dpCache) {
|
||||
depalShaderCache_ = dpCache;
|
||||
}
|
||||
void SetShaderManager(ShaderManagerDX9 *sm) {
|
||||
shaderManager_ = sm;
|
||||
}
|
||||
@ -223,6 +227,7 @@ private:
|
||||
int timesInvalidatedAllThisFrame_;
|
||||
|
||||
FramebufferManagerDX9 *framebufferManager_;
|
||||
DepalShaderCacheDX9 *depalShaderCache_;
|
||||
ShaderManagerDX9 *shaderManager_;
|
||||
};
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "DepalettizeShader.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/GLES/TextureCache.h"
|
||||
#include "GPU/Common/DepalettizeShaderCommon.h"
|
||||
|
||||
static const int DEPAL_TEXTURE_OLD_AGE = 120;
|
||||
|
||||
@ -107,216 +108,6 @@ DepalShaderCache::~DepalShaderCache() {
|
||||
glDeleteShader(vertexShader_);
|
||||
}
|
||||
|
||||
#define WRITE p+=sprintf
|
||||
|
||||
// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
|
||||
void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) {
|
||||
char *p = buffer;
|
||||
#ifdef USING_GLES2
|
||||
WRITE(p, "#version 300 es\n");
|
||||
WRITE(p, "precision mediump float;\n");
|
||||
#else
|
||||
WRITE(p, "#version 330\n");
|
||||
#endif
|
||||
WRITE(p, "in vec2 v_texcoord0;\n");
|
||||
WRITE(p, "out vec4 fragColor0;\n");
|
||||
WRITE(p, "uniform sampler2D tex;\n");
|
||||
WRITE(p, "uniform sampler2D pal;\n");
|
||||
|
||||
WRITE(p, "void main() {\n");
|
||||
WRITE(p, " vec4 color = texture(tex, v_texcoord0);\n");
|
||||
|
||||
int mask = gstate.getClutIndexMask();
|
||||
int shift = gstate.getClutIndexShift();
|
||||
int offset = gstate.getClutIndexStartPos();
|
||||
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
|
||||
// Unfortunately sampling turned our texture into floating point. To avoid this, might be able
|
||||
// to declare them as isampler2D objects, but these require integer textures, which needs more work.
|
||||
// Anyhow, we simply work around this by converting back to integer. Hopefully there will be no loss of precision.
|
||||
// Use the mask to skip reading some components.
|
||||
int shiftedMask = mask << shift;
|
||||
switch (pixelFormat) {
|
||||
case GE_FORMAT_8888:
|
||||
if (shiftedMask & 0xFF) WRITE(p, " int r = int(color.r * 255.99);\n"); else WRITE(p, " int r = 0;\n");
|
||||
if (shiftedMask & 0xFF00) WRITE(p, " int g = int(color.g * 255.99);\n"); else WRITE(p, " int g = 0;\n");
|
||||
if (shiftedMask & 0xFF0000) WRITE(p, " int b = int(color.b * 255.99);\n"); else WRITE(p, " int b = 0;\n");
|
||||
if (shiftedMask & 0xFF000000) WRITE(p, " int a = int(color.a * 255.99);\n"); else WRITE(p, " int a = 0;\n");
|
||||
WRITE(p, " int index = (a << 24) | (b << 16) | (g << 8) | (r);\n");
|
||||
break;
|
||||
case GE_FORMAT_4444:
|
||||
if (shiftedMask & 0xF) WRITE(p, " int r = int(color.r * 15.99);\n"); else WRITE(p, " int r = 0;\n");
|
||||
if (shiftedMask & 0xF0) WRITE(p, " int g = int(color.g * 15.99);\n"); else WRITE(p, " int g = 0;\n");
|
||||
if (shiftedMask & 0xF00) WRITE(p, " int b = int(color.b * 15.99);\n"); else WRITE(p, " int b = 0;\n");
|
||||
if (shiftedMask & 0xF000) WRITE(p, " int a = int(color.a * 15.99);\n"); else WRITE(p, " int a = 0;\n");
|
||||
WRITE(p, " int index = (a << 12) | (b << 8) | (g << 4) | (r);\n");
|
||||
break;
|
||||
case GE_FORMAT_565:
|
||||
if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n");
|
||||
if (shiftedMask & 0x7E0) WRITE(p, " int g = int(color.g * 63.99);\n"); else WRITE(p, " int g = 0;\n");
|
||||
if (shiftedMask & 0xF800) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n");
|
||||
WRITE(p, " int index = (b << 11) | (g << 5) | (r);\n");
|
||||
break;
|
||||
case GE_FORMAT_5551:
|
||||
if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n");
|
||||
if (shiftedMask & 0x3E0) WRITE(p, " int g = int(color.g * 31.99);\n"); else WRITE(p, " int g = 0;\n");
|
||||
if (shiftedMask & 0x7C00) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n");
|
||||
if (shiftedMask & 0x8000) WRITE(p, " int a = int(color.a);\n"); else WRITE(p, " int a = 0;\n");
|
||||
WRITE(p, " int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
float texturePixels = 256;
|
||||
if (clutFormat != GE_CMODE_32BIT_ABGR8888)
|
||||
texturePixels = 512;
|
||||
|
||||
if (shift) {
|
||||
WRITE(p, " index = ((index >> %i) & 0x%02x)", shift, mask);
|
||||
} else {
|
||||
WRITE(p, " index = (index & 0x%02x)", mask);
|
||||
}
|
||||
if (offset) {
|
||||
WRITE(p, " | %i;\n", offset); // '|' matches what we have in gstate.h
|
||||
} else {
|
||||
WRITE(p, ";\n");
|
||||
}
|
||||
|
||||
WRITE(p, " fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
|
||||
// FP only, to suit GL(ES) 2.0
|
||||
void GenerateDepalShader100(char *buffer, GEBufferFormat pixelFormat) {
|
||||
char *p = buffer;
|
||||
|
||||
char lookupMethod[128] = "index.r";
|
||||
char offset[128] = "";
|
||||
|
||||
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
|
||||
const u32 clutBase = gstate.getClutIndexStartPos();
|
||||
|
||||
const int shift = gstate.getClutIndexShift();
|
||||
const int mask = gstate.getClutIndexMask();
|
||||
|
||||
float index_multiplier = 1.0f;
|
||||
// pixelformat is the format of the texture we are sampling.
|
||||
bool formatOK = true;
|
||||
switch (pixelFormat) {
|
||||
case GE_FORMAT_8888:
|
||||
if ((mask & (mask + 1)) == 0) {
|
||||
// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
|
||||
const char *rgba = "rrrrrrrrggggggggbbbbbbbbaaaaaaaa";
|
||||
const u8 rgba_shift = shift & 7;
|
||||
if (rgba_shift == 0 && mask == 0xFF) {
|
||||
sprintf(lookupMethod, "index.%c", rgba[shift]);
|
||||
} else {
|
||||
sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], 255.99f / (1 << rgba_shift), mask + 1);
|
||||
index_multiplier = 1.0f / 256.0f;
|
||||
// Format was OK if there weren't bits from another component.
|
||||
formatOK = mask <= 255 - (1 << rgba_shift);
|
||||
}
|
||||
} else {
|
||||
formatOK = false;
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_4444:
|
||||
if ((mask & (mask + 1)) == 0 && shift < 16) {
|
||||
const char *rgba = "rrrrggggbbbbaaaa";
|
||||
const u8 rgba_shift = shift & 3;
|
||||
if (rgba_shift == 0 && mask == 0xF) {
|
||||
sprintf(lookupMethod, "index.%c", rgba[shift]);
|
||||
index_multiplier = 15.0f / 256.0f;
|
||||
} else {
|
||||
// Let's divide and mod to get the right bits. A common case is shift=0, mask=01.
|
||||
sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], 15.99f / (1 << rgba_shift), mask + 1);
|
||||
index_multiplier = 1.0f / 256.0f;
|
||||
formatOK = mask <= 15 - (1 << rgba_shift);
|
||||
}
|
||||
} else {
|
||||
formatOK = false;
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_565:
|
||||
if ((mask & (mask + 1)) == 0 && shift < 16) {
|
||||
const u8 shifts[16] = {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4};
|
||||
const int multipliers[16] = {31, 31, 31, 31, 31, 63, 63, 63, 63, 63, 63, 31, 31, 31, 31, 31};
|
||||
const char *rgba = "rrrrrggggggbbbbb";
|
||||
const u8 rgba_shift = shifts[shift];
|
||||
if (rgba_shift == 0 && mask == multipliers[shift]) {
|
||||
sprintf(lookupMethod, "index.%c", rgba[shift]);
|
||||
index_multiplier = multipliers[shift] / 256.0f;
|
||||
} else {
|
||||
// We just need to divide the right component by the right value, and then mod against the mask.
|
||||
// A common case is shift=1, mask=0f.
|
||||
sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1);
|
||||
index_multiplier = 1.0f / 256.0f;
|
||||
formatOK = mask <= multipliers[shift] - (1 << rgba_shift);
|
||||
}
|
||||
} else {
|
||||
formatOK = false;
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_5551:
|
||||
if ((mask & (mask + 1)) == 0 && shift < 16) {
|
||||
const char *rgba = "rrrrrgggggbbbbba";
|
||||
const u8 rgba_shift = shift % 5;
|
||||
if (rgba_shift == 0 && mask == 0x1F) {
|
||||
sprintf(lookupMethod, "index.%c", rgba[shift]);
|
||||
index_multiplier = 31.0f / 256.0f;
|
||||
} else if (shift == 15 && mask == 1) {
|
||||
sprintf(lookupMethod, "index.%c", rgba[shift]);
|
||||
index_multiplier = 1.0f / 256.0f;
|
||||
} else {
|
||||
// A isn't possible here.
|
||||
sprintf(lookupMethod, "mod(index.%c * %f, %d.0)", rgba[shift], 31.99f / (1 << rgba_shift), mask + 1);
|
||||
index_multiplier = 1.0f / 256.0f;
|
||||
formatOK = mask <= 31 - (1 << rgba_shift);
|
||||
}
|
||||
} else {
|
||||
formatOK = false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
float texturePixels = 256.f;
|
||||
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
|
||||
texturePixels = 512.f;
|
||||
index_multiplier *= 0.5f;
|
||||
}
|
||||
|
||||
// Adjust index_multiplier, similar to the use of 15.99 instead of 16 in the ES 3 path.
|
||||
// index_multiplier -= 0.01f / texturePixels;
|
||||
|
||||
if (!formatOK) {
|
||||
ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", pixelFormat, shift, mask, clutBase);
|
||||
}
|
||||
|
||||
// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
|
||||
float texel_offset = ((float)clutBase + 0.5f) / texturePixels;
|
||||
sprintf(offset, " + %f", texel_offset);
|
||||
|
||||
#ifdef USING_GLES2
|
||||
WRITE(p, "#version 100\n");
|
||||
WRITE(p, "precision mediump float;\n");
|
||||
#else
|
||||
WRITE(p, "#version 110\n");
|
||||
#endif
|
||||
WRITE(p, "varying vec2 v_texcoord0;\n");
|
||||
WRITE(p, "uniform sampler2D tex;\n");
|
||||
WRITE(p, "uniform sampler2D pal;\n");
|
||||
WRITE(p, "void main() {\n");
|
||||
WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n");
|
||||
WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
|
||||
WRITE(p, " gl_FragColor = texture2D(pal, vec2(coord, 0.0));\n");
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
|
||||
#undef WRITE
|
||||
|
||||
|
||||
u32 DepalShaderCache::GenerateShaderID(GEBufferFormat pixelFormat) {
|
||||
return (gstate.clutformat & 0xFFFFFF) | (pixelFormat << 24);
|
||||
}
|
||||
@ -394,11 +185,7 @@ GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) {
|
||||
|
||||
char *buffer = new char[2048];
|
||||
|
||||
if (useGL3_) {
|
||||
GenerateDepalShader300(buffer, pixelFormat);
|
||||
} else {
|
||||
GenerateDepalShader100(buffer, pixelFormat);
|
||||
}
|
||||
GenerateDepalShader(buffer, pixelFormat, useGL3_ ? GLSL_300 : GLSL_140);
|
||||
|
||||
GLuint fragShader = glCreateShader(GL_FRAGMENT_SHADER);
|
||||
|
||||
|
@ -181,6 +181,7 @@
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\ext\xbrz\xbrz.h" />
|
||||
<ClInclude Include="Common\DepalettizeShaderCommon.h" />
|
||||
<ClInclude Include="Common\DrawEngineCommon.h" />
|
||||
<ClInclude Include="Common\FramebufferCommon.h" />
|
||||
<ClInclude Include="Common\GPUDebugInterface.h" />
|
||||
@ -199,6 +200,7 @@
|
||||
<ClInclude Include="Common\VertexDecoderCommon.h" />
|
||||
<ClInclude Include="Debugger\Breakpoints.h" />
|
||||
<ClInclude Include="Debugger\Stepping.h" />
|
||||
<ClInclude Include="Directx9\DepalettizeShaderDX9.h" />
|
||||
<ClInclude Include="Directx9\GPU_DX9.h" />
|
||||
<ClInclude Include="Directx9\helper\dx_state.h" />
|
||||
<ClInclude Include="Directx9\helper\fbo.h" />
|
||||
@ -239,6 +241,7 @@
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\ext\xbrz\xbrz.cpp" />
|
||||
<ClCompile Include="Common\DepalettizeShaderCommon.cpp" />
|
||||
<ClCompile Include="Common\DrawEngineCommon.cpp" />
|
||||
<ClCompile Include="Common\FramebufferCommon.cpp" />
|
||||
<ClCompile Include="Common\IndexGenerator.cpp" />
|
||||
@ -263,6 +266,7 @@
|
||||
<ClCompile Include="Common\VertexDecoderX86.cpp" />
|
||||
<ClCompile Include="Debugger\Breakpoints.cpp" />
|
||||
<ClCompile Include="Debugger\Stepping.cpp" />
|
||||
<ClCompile Include="Directx9\DepalettizeShaderDX9.cpp" />
|
||||
<ClCompile Include="Directx9\GPU_DX9.cpp" />
|
||||
<ClCompile Include="Directx9\helper\dx_state.cpp" />
|
||||
<ClCompile Include="Directx9\helper\fbo.cpp" />
|
||||
|
@ -177,6 +177,12 @@
|
||||
<ClInclude Include="Common\DrawEngineCommon.h">
|
||||
<Filter>Common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Common\DepalettizeShaderCommon.h">
|
||||
<Filter>Common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Directx9\DepalettizeShaderDX9.h">
|
||||
<Filter>DirectX9</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Math3D.cpp">
|
||||
@ -338,6 +344,12 @@
|
||||
<ClCompile Include="Directx9\StencilBufferDX9.cpp">
|
||||
<Filter>DirectX9</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Common\DepalettizeShaderCommon.cpp">
|
||||
<Filter>Common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Directx9\DepalettizeShaderDX9.cpp">
|
||||
<Filter>DirectX9</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="CMakeLists.txt" />
|
||||
|
@ -41,6 +41,7 @@ SOURCES += $$P/GPU/GeDisasm.cpp \ # GPU
|
||||
$$P/GPU/GLES/VertexShaderGenerator.cpp \
|
||||
$$P/GPU/Software/*.cpp \
|
||||
$$P/GPU/Debugger/*.cpp \
|
||||
$$P/GPU/Common/DepalettizeShaderCommon.cpp \
|
||||
$$P/GPU/Common/IndexGenerator.cpp \
|
||||
$$P/GPU/Common/TextureDecoder.cpp \
|
||||
$$P/GPU/Common/VertexDecoderCommon.cpp \
|
||||
|
@ -148,6 +148,7 @@ EXEC_AND_LIB_FILES := \
|
||||
$(SRC)/GPU/GPUCommon.cpp \
|
||||
$(SRC)/GPU/GPUState.cpp \
|
||||
$(SRC)/GPU/GeDisasm.cpp \
|
||||
$(SRC)/GPU/Common/DepalettizeShaderCommon.cpp \
|
||||
$(SRC)/GPU/Common/FramebufferCommon.cpp \
|
||||
$(SRC)/GPU/Common/IndexGenerator.cpp.arm \
|
||||
$(SRC)/GPU/Common/SoftwareTransformCommon.cpp.arm \
|
||||
|
Loading…
x
Reference in New Issue
Block a user