Initial work on depalettization.

This commit is contained in:
Henrik Rydgard 2014-03-29 21:58:38 +01:00
parent 585050de27
commit 8fba7fa98e
9 changed files with 405 additions and 17 deletions

View File

@ -0,0 +1,261 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <map>
#include "Common/Log.h"
#include "DepalettizeShader.h"
#include "GPU/GPUState.h"
#include "GPU/GLES/TextureCache.h"
static const char *depalVShader =
"#version 100\n"
"// Depal shader\n"
"attribute vec4 a_position;\n"
"attribute vec2 a_texcoord0;\n"
"varying vec2 v_texcoord0;\n"
"void main() {\n"
" v_texcoord0 = a_texcoord0;\n"
" gl_Position = a_position;\n"
"}\n";
static bool CheckShaderCompileSuccess(GLuint shader, const char *code) {
GLint success;
glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
if (!success) {
#define MAX_INFO_LOG_SIZE 2048
GLchar infoLog[MAX_INFO_LOG_SIZE];
GLsizei len;
glGetShaderInfoLog(shader, MAX_INFO_LOG_SIZE, &len, infoLog);
infoLog[len] = '\0';
#ifdef ANDROID
ELOG("Error in shader compilation! %s\n", infoLog);
ELOG("Shader source:\n%s\n", (const char *)code);
#endif
ERROR_LOG(G3D, "Error in shader compilation!\n");
ERROR_LOG(G3D, "Info log: %s\n", infoLog);
ERROR_LOG(G3D, "Shader source:\n%s\n", (const char *)code);
#ifdef SHADERLOG
OutputDebugStringUTF8(infoLog);
#endif
shader = 0;
return false;
} else {
DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code);
return true;
}
}
DepalShaderCache::DepalShaderCache() {
// Pre-build the vertex program
vertexShader_ = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertexShader_, 1, &depalVShader, 0);
glCompileShader(vertexShader_);
if (CheckShaderCompileSuccess(vertexShader_, depalVShader)) {
// ...
}
}
DepalShaderCache::~DepalShaderCache() {
Clear();
glDeleteShader(vertexShader_);
}
void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat) {
char *p = buffer;
#define WRITE p+=sprintf
WRITE(p, "#version 100\n");
WRITE(p, "varying vec2 texcoord0;\n");
WRITE(p, "uniform sampler2D tex;\n");
WRITE(p, "uniform sampler2D pal;\n");
WRITE(p, "void main() {\n");
WRITE(p, " vec4 index = texture2D(tex);\n");
char lookupMethod[128] = "index.r";
char offset[128] = "";
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
const u32 clutBase = gstate.getClutIndexStartPos();
int shift = gstate.getClutIndexShift();
int mask = gstate.getClutIndexMask();
// pixelformat is the format of the texture we are sampling.
switch (pixelFormat) {
case GE_FORMAT_8888:
if (mask == 0xFF) {
switch (shift) { // bgra?
case 0: strcpy(lookupMethod, "index.r"); break;
case 8: strcpy(lookupMethod, "index.g"); break;
case 16: strcpy(lookupMethod, "index.b"); break;
default:
case 24: strcpy(lookupMethod, "index.a"); break;
}
} else {
// Ugh
}
break;
case GE_FORMAT_4444:
if ((mask & 0xF) == 0xF) {
switch (shift) { // bgra?
case 0: strcpy(lookupMethod, "index.r"); break;
case 4: strcpy(lookupMethod, "index.g"); break;
case 8: strcpy(lookupMethod, "index.b"); break;
default:
case 12: strcpy(lookupMethod, "index.a"); break;
}
} else {
// Ugh
}
break;
case GE_FORMAT_565:
if ((mask & 0x3f) == 0x3F) {
switch (shift) { // bgra?
case 0: strcpy(lookupMethod, "index.r"); break;
case 5: strcpy(lookupMethod, "index.g"); break;
default:
case 11: strcpy(lookupMethod, "index.b"); break;
}
} else {
// Ugh
}
break;
case GE_FORMAT_5551:
if ((mask & 0x1F) == 0x1F) {
switch (shift) { // bgra?
case 0: strcpy(lookupMethod, "index.r"); break;
case 4: strcpy(lookupMethod, "index.g"); break;
case 8: strcpy(lookupMethod, "index.b"); break;
default:
case 15: strcpy(lookupMethod, "index.a"); break;
}
} else {
// Ugh
}
break;
}
if (clutBase != 0) {
sprintf(offset, " + %.0f", (float)clutBase / 255.0f); // 256?
}
WRITE(p, " vec4 color = texture2D(pal, vec2(%s%s, 0.0));\n", lookupMethod, offset);
WRITE(p, " gl_Color = color;\n");
WRITE(p, "}\n");
}
u32 DepalShaderCache::GenerateShaderID(GEBufferFormat pixelFormat) {
return (gstate.clutformat & 0xFFFFFF) | (pixelFormat << 24);
}
GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) {
auto oldtex = texCache_.find(clutID);
if (oldtex != texCache_.end()) {
return oldtex->second->texture;
}
GLuint dstFmt = getClutDestFormat(gstate.getClutPaletteFormat());
DepalTexture *tex = new DepalTexture();
glGenTextures(1, &tex->texture);
glActiveTexture(1);
glBindTexture(GL_TEXTURE_2D, tex->texture);
GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA;
glTexImage2D(GL_TEXTURE_2D, 0, components, 256, 1, 0, components, dstFmt, (void *)rawClut);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glActiveTexture(0);
texCache_[clutID] = tex;
return tex->texture;
}
void DepalShaderCache::Clear() {
for (auto shader : cache_) {
glDeleteShader(shader.second->fragShader);
glDeleteProgram(shader.second->program);
delete shader.second;
}
for (auto tex : texCache_) {
glDeleteTextures(1, &tex.second->texture);
delete tex.second;
}
}
void DepalShaderCache::Decimate() {
// TODO
}
GLuint DepalShaderCache::GetDepalettizeShader(GEBufferFormat pixelFormat) {
u32 id = GenerateShaderID(pixelFormat);
auto shader = cache_.find(id);
if (shader != cache_.end()) {
return shader->second->program;
}
char buffer[2048];
GenerateDepalShader(buffer, pixelFormat);
GLuint fragShader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertexShader_, 1, &depalVShader, 0);
glCompileShader(vertexShader_);
GLuint program = glCreateProgram();
glAttachShader(program, vertexShader_);
glAttachShader(program, fragShader);
glBindAttribLocation(program, 0, "a_position");
glBindAttribLocation(program, 1, "a_texcoord0");
glLinkProgram(program);
glUseProgram(program);
GLint u_tex = glGetUniformLocation(program, "tex");
GLint u_pal = glGetUniformLocation(program, "pal");
glUniform1d(u_tex, 0);
glUniform1d(u_pal, 1);
GLint linkStatus = GL_FALSE;
glGetProgramiv(program, GL_LINK_STATUS, &linkStatus);
if (linkStatus != GL_TRUE) {
GLint bufLength = 0;
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &bufLength);
if (bufLength) {
char* buf = new char[bufLength];
glGetProgramInfoLog(program, bufLength, NULL, buf);
ERROR_LOG(G3D, "Could not link program:\n %s", buf);
delete[] buf; // we're dead!
}
return 0;
}
DepalShader *depal = new DepalShader();
depal->program = program;
depal->fragShader = fragShader;
cache_[id] = depal;
return depal->program;
}

View File

@ -0,0 +1,54 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <map>
#include "Common/CommonTypes.h"
#include "gfx_es2/gl_state.h"
#include "GPU/ge_constants.h"
class DepalShader {
public:
GLuint program;
GLuint fragShader;
};
class DepalTexture {
public:
GLuint texture;
};
// Caches both shaders and palette textures.
class DepalShaderCache {
public:
DepalShaderCache();
~DepalShaderCache();
// This also uploads the palette and binds the correct texture.
GLuint GetDepalettizeShader(GEBufferFormat pixelFormat);
GLuint GetClutTexture(const u32 clutHash, u32 *rawClut);
void Clear();
void Decimate();
private:
u32 GenerateShaderID(GEBufferFormat pixelFormat);
GLuint vertexShader_;
std::map<u32, DepalShader *> cache_;
std::map<u32, DepalTexture *> texCache_;
};

View File

@ -86,6 +86,7 @@ struct VirtualFramebuffer {
GEBufferFormat format; // virtual, right now they are all RGBA8888
FBOColorDepth colorDepth;
FBO *fbo;
FBO *depalFBO;
bool dirtyAfterDisplay;
bool reallyDirtyAfterDisplay; // takes frame skipping into account

View File

@ -410,6 +410,7 @@ GLES_GPU::GLES_GPU()
framebufferManager_.SetTextureCache(&textureCache_);
framebufferManager_.SetShaderManager(shaderManager_);
textureCache_.SetFramebufferManager(&framebufferManager_);
textureCache_.SetDepalShaderCache(&depalShaderCache_);
// Sanity check gstate
if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) {

View File

@ -26,6 +26,7 @@
#include "GPU/GLES/Framebuffer.h"
#include "GPU/GLES/TransformPipeline.h"
#include "GPU/GLES/TextureCache.h"
#include "GPU/GLES/DepalettizeShader.h"
class ShaderManager;
class LinkedShader;
@ -162,6 +163,7 @@ private:
FramebufferManager framebufferManager_;
TextureCache textureCache_;
DepalShaderCache depalShaderCache_;
TransformDrawEngine transformDraw_;
ShaderManager *shaderManager_;

View File

@ -27,6 +27,7 @@
#include "GPU/GLES/TextureCache.h"
#include "GPU/GLES/Framebuffer.h"
#include "GPU/GLES/FragmentShaderGenerator.h"
#include "GPU/GLES/DepalettizeShader.h"
#include "GPU/Common/TextureDecoder.h"
#include "Core/Config.h"
#include "Core/Host.h"
@ -117,6 +118,9 @@ void TextureCache::Decimate() {
for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ) {
if (iter->second.lastFrame + killAge < gpuStats.numFlips) {
glDeleteTextures(1, &iter->second.texture);
if (iter->second.depalFBO) {
fbo_destroy(iter->second.depalFBO);
}
cache.erase(iter++);
} else {
++iter;
@ -128,6 +132,9 @@ void TextureCache::Decimate() {
// In low memory mode, we kill them all.
if (lowMemoryMode_ || iter->second.lastFrame + TEXTURE_SECOND_KILL_AGE < gpuStats.numFlips) {
glDeleteTextures(1, &iter->second.texture);
if (iter->second.depalFBO) {
fbo_destroy(iter->second.depalFBO);
}
secondCache.erase(iter++);
} else {
++iter;
@ -214,7 +221,18 @@ inline void AttachFramebufferInvalid(T &entry, VirtualFramebuffer *framebuffer)
}
}
inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch) {
bool TextureCache::AttachFramebufferCLUT(TextureCache::TexCacheEntry *entry, VirtualFramebuffer *framebuffer, u32 address) {
GLuint program = depalShaderCache_->GetDepalettizeShader(framebuffer->format);
if (program) {
entry->framebuffer = framebuffer;
entry->invalidHint = -1;
entry->status |= TexCacheEntry::STATUS_DEPALETTIZE;
return true;
}
return false;
}
void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch) {
// If they match exactly, it's non-CLUT and from the top left.
if (exactMatch) {
// Apply to non-buffered and buffered mode only.
@ -237,22 +255,31 @@ inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, V
if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE))
return;
// 3rd Birthday (and possibly other games) render to a 16 bit clut texture.
const bool compatFormat = framebuffer->format == entry->format
|| (framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32)
|| (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16);
// Check for CLUT. The framebuffer is always RGB, but it can be interpreted as a CLUT texture.
// 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture.
bool clutSuccess = false;
if (((framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || (framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16))) {
clutSuccess = AttachFramebufferCLUT(entry, framebuffer, address);
}
// Is it at least the right stride?
if (framebuffer->fb_stride == entry->bufw && compatFormat) {
if (framebuffer->format != entry->format) {
WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address);
// TODO: Use an FBO to translate the palette?
AttachFramebufferValid(entry, framebuffer);
} else if ((entry->addr - address) / entry->bufw < framebuffer->height) {
WARN_LOG_REPORT_ONCE(subarea, G3D, "Render to area containing texture at %08x", address);
// TODO: Keep track of the y offset.
// If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect.
AttachFramebufferInvalid(entry, framebuffer);
if (!clutSuccess) {
// This is either normal or we failed to generate a shader to depalettize
const bool compatFormat = framebuffer->format == entry->format ||
(framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) ||
(framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16);
// Is it at least the right stride?
if (framebuffer->fb_stride == entry->bufw && compatFormat) {
if (framebuffer->format != entry->format) {
WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address);
// TODO: Use an FBO to translate the palette?
AttachFramebufferValid(entry, framebuffer);
} else if ((entry->addr - address) / entry->bufw < framebuffer->height) {
WARN_LOG_REPORT_ONCE(subarea, G3D, "Render to area containing texture at %08x", address);
// TODO: Keep track of the y offset.
// If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect.
AttachFramebufferInvalid(entry, framebuffer);
}
}
}
}
@ -723,6 +750,7 @@ static inline u32 MiniHash(const u32 *ptr) {
return ptr[0];
}
// TODO: Unused, remove?
static inline u32 QuickClutHash(const u8 *clut, u32 bytes) {
// CLUTs always come in multiples of 32 bytes, can't load them any other way.
_dbg_assert_msg_(G3D, (bytes & 31) == 0, "CLUT should always have a multiple of 32 bytes.");
@ -889,6 +917,28 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry) {
if (useBufferedRendering) {
framebufferManager_->BindFramebufferColor(entry->framebuffer);
if (entry->status & TexCacheEntry::STATUS_DEPALETTIZE) {
GLuint program = depalShaderCache_->GetDepalettizeShader(entry->framebuffer->format);
glUseProgram(program);
// Check if we can handle the current setup
GLuint clutTexture = depalShaderCache_->GetClutTexture(clutHash_, clutBufConverted_);
glActiveTexture(1);
glBindTexture(GL_TEXTURE_2D, clutTexture);
glActiveTexture(0);
if (!entry->depalFBO) {
entry->depalFBO = fbo_create(entry->framebuffer->bufferWidth, entry->framebuffer->bufferHeight, 1, false, FBO_8888);
}
fbo_bind_as_render_target(entry->depalFBO);
// ...
fbo_bind_color_as_texture(entry->depalFBO, 0);
}
// Keep the framebuffer alive.
entry->framebuffer->last_frame_used = gpuStats.numFlips;
@ -1149,6 +1199,7 @@ void TextureCache::SetTexture(bool force) {
entry->framebuffer = 0;
entry->maxLevel = maxLevel;
entry->lodBias = 0.0f;
entry->depalFBO = 0;
entry->dim = gstate.getTextureDimension(0);
entry->bufw = bufw;

View File

@ -25,6 +25,7 @@
struct VirtualFramebuffer;
class FramebufferManager;
class DepalShaderCache;
enum TextureFiltering {
AUTO = 1,
@ -60,6 +61,9 @@ public:
void SetFramebufferManager(FramebufferManager *fbManager) {
framebufferManager_ = fbManager;
}
void SetDepalShaderCache(DepalShaderCache *dpCache) {
depalShaderCache_ = dpCache;
}
size_t NumLoadedTextures() const {
return cache.size();
@ -93,6 +97,8 @@ private:
STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 15 frames in between.)
STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail.
STATUS_DEPALETTIZE = 0x40,
STATUS_DEPALETTIZE_DIRTY = 0x80
};
// Status, but int so we can zero initialize.
@ -100,6 +106,7 @@ private:
u32 addr;
u32 hash;
VirtualFramebuffer *framebuffer; // if null, not sourced from an FBO.
FBO *depalFBO;
u32 sizeInRAM;
int lastFrame;
int numFrames;
@ -150,6 +157,7 @@ private:
u32 GetCurrentClutHash();
void UpdateCurrentClut();
void AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch);
bool AttachFramebufferCLUT(TextureCache::TexCacheEntry *entry, VirtualFramebuffer *framebuffer, u32 address);
void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer);
void SetTextureFramebuffer(TexCacheEntry *entry);
@ -184,5 +192,7 @@ private:
int decimationCounter_;
FramebufferManager *framebufferManager_;
DepalShaderCache *depalShaderCache_;
};
GLenum getClutDestFormat(GEPaletteFormat format);

View File

@ -190,6 +190,7 @@
<ClInclude Include="Directx9\VertexShaderGeneratorDX9.h" />
<ClInclude Include="ge_constants.h" />
<ClInclude Include="GeDisasm.h" />
<ClInclude Include="GLES\DepalettizeShader.h" />
<ClInclude Include="GLES\FragmentShaderGenerator.h" />
<ClInclude Include="GLES\Framebuffer.h" />
<ClInclude Include="GLES\GLES_GPU.h" />
@ -242,6 +243,7 @@
<ClCompile Include="Directx9\VertexDecoderDX9.cpp" />
<ClCompile Include="Directx9\VertexShaderGeneratorDX9.cpp" />
<ClCompile Include="GeDisasm.cpp" />
<ClCompile Include="GLES\DepalettizeShader.cpp" />
<ClCompile Include="GLES\FragmentShaderGenerator.cpp" />
<ClCompile Include="GLES\Framebuffer.cpp" />
<ClCompile Include="GLES\GLES_GPU.cpp" />

View File

@ -165,6 +165,9 @@
<ClInclude Include="Common\TransformCommon.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="GLES\DepalettizeShader.h">
<Filter>GLES</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Math3D.cpp">
@ -308,8 +311,11 @@
<ClCompile Include="Common\TransformCommon.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="GLES\DepalettizeShader.cpp">
<Filter>GLES</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />
</ItemGroup>
</Project>
</Project>