Start implementing ApplyTextureDepal

This commit is contained in:
Henrik Rydgård 2022-09-13 23:55:57 +02:00
parent 431f142413
commit 9907957242
5 changed files with 142 additions and 9 deletions

View File

@ -84,6 +84,9 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
int shiftedMask = mask << shift;
switch (config.bufferFormat) {
case GE_FORMAT_CLUT8:
writer.C(" int index = int(color.r * 255.99);\n");
break;
case GE_FORMAT_8888:
if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n");
if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n");
@ -168,6 +171,9 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
// pixelformat is the format of the texture we are sampling.
bool formatOK = true;
switch (config.bufferFormat) {
case GE_FORMAT_CLUT8:
_dbg_assert_(false); // to be implemented
break;
case GE_FORMAT_8888:
if ((mask & (mask + 1)) == 0) {
// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.

View File

@ -425,6 +425,14 @@ public:
Draw2DPipeline *Get2DPipeline(Draw2DShader shader);
Draw2DPipeline *GetReinterpretPipeline(GEBufferFormat from, GEBufferFormat to, float *scaleFactorX);
// Public to be used from the texture cache's depal shenanigans.
void BlitUsingRaster(
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
bool linearFilter,
int scaleFactor, // usually unused, except for swizzle...
Draw2DPipeline *pipeline, const char *tag);
protected:
virtual void PackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel);
void SetViewport2D(int x, int y, int w, int h);
@ -442,13 +450,6 @@ protected:
// Used by ReadFramebufferToMemory and later framebuffer block copies
void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, RasterChannel channel, const char *tag);
void BlitUsingRaster(
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
bool linearFilter,
int scaleFactor, // usually unused, except for swizzle...
Draw2DPipeline *pipeline, const char *tag);
void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags);
void EstimateDrawingSize(u32 fb_address, int fb_stride, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int &drawing_width, int &drawing_height);

View File

@ -1923,7 +1923,6 @@ static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebuffe
return false;
}
void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) {
Draw2DPipeline *textureShader = nullptr;
uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
@ -2079,6 +2078,126 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
}
void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) {
Draw2DPipeline *textureShader = nullptr;
uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
switch (entry->format) {
case GE_TFMT_CLUT4:
case GE_TFMT_CLUT8:
break; // These are OK
default:
_dbg_assert_(false);
return;
}
// Create GPU resources.
if (!dynamicClutFbo_) {
Draw::FramebufferDesc desc{};
desc.width = 512;
desc.height = 1;
desc.depth = 1;
desc.z_stencil = false;
desc.numColorAttachments = 1;
dynamicClutFbo_ = draw_->CreateFramebuffer(desc);
dynamicClutReinterpreted_ = draw_->CreateFramebuffer(desc);
}
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
u32 depthUpperBits = 0;
// The CLUT texture is dynamic, it's the framebuffer pointed to by clutRenderAddress.
// Instead of texturing directly from that, we copy to a temporary CLUT texture.
GEBufferFormat expectedCLUTBufferFormat = (GEBufferFormat)clutFormat; // All entries from clutFormat correspond directly to buffer formats.
VirtualFramebuffer *src = framebufferManager_->GetVFBAt(clutRenderAddress_);
if (!src) {
// What do we do?
return;
}
Draw::Framebuffer *clutFbo = dynamicClutFbo_;
// First we use a blit (with nearest interpolation, so we don't mash pixels together)
// to shrink to the correct size, if we are running with scaling.
// We can always blit 512 pixels even if we only need less, the cost will be negligible.
framebufferManager_->BlitUsingRaster(
src->fbo, 0.0f, 0.0f, 512.0f * src->renderScaleFactor, 1.0f, dynamicClutFbo_, 0.0f, 0.0f, 512.0f, 1.0f, false, 1.0f, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR), "copy_clut");
// OK, figure out what format we want our framebuffer in, so it can be reinterpreted if needed.
if (expectedCLUTBufferFormat != src->fb_format) {
float scaleFactorX = 1.0f;
Draw2DPipeline *reinterpret = framebufferManager_->GetReinterpretPipeline(src->fb_format, expectedCLUTBufferFormat, &scaleFactorX);
framebufferManager_->BlitUsingRaster(
dynamicClutFbo_, 0.0f, 0.0f, 512.0f, 1.0f, dynamicClutReinterpreted_, 0.0f, 0.0f, 512.0f, 1.0f, false, 1.0f, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR), "copy_clut");
clutFbo = dynamicClutReinterpreted_;
}
textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, GE_TFMT_CLUT8, GE_FORMAT_CLUT8, false, 0);
gstate_c.SetUseShaderDepal(ShaderDepalMode::OFF);
int texWidth = gstate.getTextureWidth(0);
int texHeight = gstate.getTextureHeight(0);
// If min is not < max, then we don't have values (wasn't set during decode.)
const KnownVertexBounds &bounds = gstate_c.vertBounds;
float u1 = 0.0f;
float v1 = 0.0f;
float u2 = 1.0f;
float v2 = 1.0f;
if (bounds.minV < bounds.maxV) {
u1 = (bounds.minU + gstate_c.curTextureXOffset) * texWidth;
v1 = (bounds.minV + gstate_c.curTextureYOffset) * texHeight;
u2 = (bounds.maxU + gstate_c.curTextureXOffset) * texWidth;
v2 = (bounds.maxV + gstate_c.curTextureYOffset) * texHeight;
// We need to reapply the texture next time since we cropped UV.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
}
Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, texWidth, texHeight);
draw_->BindTexture(0, nullptr);
draw_->BindTexture(1, nullptr);
draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "Depal");
draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_STORE, Draw::FB_DEPTH_BIT | Draw::FB_STENCIL_BIT);
draw_->SetScissorRect(u1, v1, u2 - u1, v2 - v1);
Draw::Viewport vp{ 0.0f, 0.0f, (float)texWidth, (float)texHeight, 0.0f, 1.0f };
draw_->SetViewports(1, &vp);
draw_->BindTexture(0, framebuffer->fbo);
draw_->BindFramebufferAsTexture(clutFbo, 1, Draw::FB_COLOR_BIT, 0);
Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false);
Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(false);
draw_->BindSamplerStates(0, 1, &nearest);
draw_->BindSamplerStates(1, 1, &clutSampler);
draw2D_->Blit(textureShader, u1, v1, u2, v2, u1, v1, u2, v2, texWidth, texHeight, texWidth, texHeight, false, 1);
gpuStats.numDepal++;
gstate_c.curTextureWidth = texWidth;
draw_->BindTexture(0, nullptr);
framebufferManager_->RebindFramebuffer("ApplyTextureFramebuffer");
draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0);
BoundFramebufferTexture();
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
CheckAlphaResult alphaStatus = CheckCLUTAlpha((const uint8_t *)clutBufRaw_, clutFormat, clutTotalColors);
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
draw_->InvalidateCachedState();
shaderManager_->DirtyLastShader();
SamplerCacheKey samplerKey = GetFramebufferSamplingParams(texWidth, texHeight);
ApplySamplingParams(samplerKey);
// Since we started/ended render passes, might need these.
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
}
void TextureCacheCommon::Clear(bool delete_them) {
textureShaderCache_->Clear();

View File

@ -144,6 +144,8 @@ struct TexCacheEntry {
STATUS_FORCE_REBUILD = 0x2000,
STATUS_3D = 0x4000,
STATUS_CLUT_GPU = 0x8000,
};
// Status, but int so we can zero initialize.
@ -352,6 +354,7 @@ protected:
void Decimate(bool forcePressure = false);
void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel);
void ApplyTextureDepal(TexCacheEntry *entry);
void HandleTextureChange(TexCacheEntry *const entry, const char *reason, bool initialMatch, bool doDelete);
virtual void BuildTexture(TexCacheEntry *const entry) = 0;
@ -480,6 +483,10 @@ protected:
bool clutAlphaLinear_ = false;
u16 clutAlphaLinearColor_;
// Facilities for GPU depal of static textures.
Draw::Framebuffer *dynamicClutFbo_ = nullptr;
Draw::Framebuffer *dynamicClutReinterpreted_ = nullptr;
int standardScaleFactor_;
int shaderScaleFactor_ = 0;

View File

@ -102,7 +102,7 @@ ClutTexture TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const
int lastA = 0;
int rampLength = 0;
// Quick check for how many continouosly growing entries we have at the start.
// Quick check for how many continuously growing entries we have at the start.
// Bilinearly filtering CLUTs only really makes sense for this kind of ramp.
for (int i = 0; i < maxClutEntries; i++) {
rampLength = i;