Initial implementation of 32/16-bit color reinterpret blits.

This commit is contained in:
Henrik Rydgård 2022-08-26 12:16:56 +02:00
parent afb90f6b04
commit 3c5ec25f61
8 changed files with 102 additions and 44 deletions

View File

@ -85,6 +85,12 @@ struct UniformBufferDesc {
std::vector<UniformDesc> uniforms;
};
struct UniformDef {
const char *type;
const char *name;
int index;
};
struct SamplerDef {
const char *name;
// TODO: Might need unsigned samplers, 3d samplers, or other types in the future.

View File

@ -22,12 +22,6 @@ struct InputDef {
int semantic;
};
struct UniformDef {
const char *type;
const char *name;
int index;
};
struct VaryingDef {
const char *type;
const char *name;

View File

@ -40,7 +40,7 @@ static const SamplerDef samplers[1] = {
{ "tex" },
};
static const UniformDef uniforms[2] = {
const UniformDef g_draw2Duniforms[2] = {
{ "vec2", "texSize", 0 },
{ "float", "scaleFactor", 1},
};
@ -102,7 +102,7 @@ Draw2DPipelineInfo GenerateDraw2D565ToDepthFs(ShaderWriter &writer) {
Draw2DPipelineInfo GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers);
writer.BeginFSMain(uniforms, varyings, FSFLAG_WRITEDEPTH);
writer.BeginFSMain(g_draw2Duniforms, varyings, FSFLAG_WRITEDEPTH);
writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
// Unlike when just copying a depth buffer, here we're generating new depth values so we'll
// have to apply the scaling.

View File

@ -36,6 +36,8 @@ struct Draw2DPipelineInfo {
Slice<SamplerDef> samplers;
};
extern const UniformDef g_draw2Duniforms[2];
struct Draw2DPipeline {
Draw::Pipeline *pipeline;
Draw2DPipelineInfo info;

View File

@ -725,33 +725,30 @@ void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFra
}
if (IsBufferFormat16Bit(src->fb_format) && !IsBufferFormat16Bit(dst->fb_format)) {
WARN_LOG_N_TIMES(i16to32, 50, G3D, "16-bit to 32-bit reinterpret needed: %s to %s", GeBufferFormatToString(src->fb_format), GeBufferFormatToString(dst->fb_format));
// We halve the X coordinates in the destination framebuffer.
// The shader will collect two pixels worth of input data and merge into one.
dstX1 *= 0.5f;
dstX2 *= 0.5f;
} else if (!IsBufferFormat16Bit(src->fb_format) && IsBufferFormat16Bit(dst->fb_format)) {
WARN_LOG_N_TIMES(i32to16, 50, G3D, "32-bit to 16-bit reinterpret needed: %s to %s", GeBufferFormatToString(src->fb_format), GeBufferFormatToString(dst->fb_format));
// We double the X coordinates in the destination framebuffer.
// The shader will sample and depending on the X coordinate & 1, use the upper or lower bits.
dstX1 *= 2.0f;
dstX2 *= 2.0f;
}
if (IsBufferFormat16Bit(src->fb_format) && IsBufferFormat16Bit(dst->fb_format)) {
// Reinterpret!
WARN_LOG_N_TIMES(reint, 20, G3D, "Reinterpret detected from %08x_%s to %08x_%s",
src->fb_address, GeBufferFormatToString(src->fb_format),
dst->fb_address, GeBufferFormatToString(dst->fb_format));
pipeline = reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format];
pass_name = reinterpretStrings[(int)src->fb_format][(int)dst->fb_format];
if (!pipeline) {
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
return GenerateReinterpretFragmentShader(shaderWriter, src->fb_format, dst->fb_format);
});
reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format] = pipeline;
}
// Reinterpret!
WARN_LOG_N_TIMES(reint, 20, G3D, "Reinterpret detected from %08x_%s to %08x_%s",
src->fb_address, GeBufferFormatToString(src->fb_format),
dst->fb_address, GeBufferFormatToString(dst->fb_format));
pipeline = reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format];
pass_name = reinterpretStrings[(int)src->fb_format][(int)dst->fb_format];
if (!pipeline) {
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
return GenerateReinterpretFragmentShader(shaderWriter, src->fb_format, dst->fb_format);
});
reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format] = pipeline;
}
gpuStats.numReinterpretCopies++;
} else if (IsBufferFormat16Bit(src->fb_format) && IsBufferFormat16Bit(dst->fb_format)) {
// Fake reinterpret - just clear the way we always did on Vulkan. Just clear color and stencil.
@ -2575,8 +2572,8 @@ void FramebufferManagerCommon::DeviceLost() {
presentation_->DeviceLost();
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
for (int i = 0; i < ARRAY_SIZE(reinterpretFromTo_); i++) {
for (int j = 0; j < ARRAY_SIZE(reinterpretFromTo_); j++) {
DoRelease(reinterpretFromTo_[i][j]);
}
}

View File

@ -533,10 +533,10 @@ protected:
FBO_OLD_USAGE_FLAG = 15,
};
// Thin3D stuff for reinterpreting image data between the various 16-bit formats.
// Thin3D stuff for reinterpreting image data between the various 16-bit color formats.
// Safe, not optimal - there might be input attachment tricks, etc, but we can't use them
// since we don't want N different implementations.
Draw2DPipeline *reinterpretFromTo_[3][3]{};
Draw2DPipeline *reinterpretFromTo_[4][4]{};
// Common implementation of stencil buffer upload. Also not 100% optimal, but not performance
// critical either.

View File

@ -25,106 +25,161 @@ Draw2DPipelineInfo GenerateReinterpretFragmentShader(ShaderWriter &writer, GEBuf
writer.DeclareSamplers(samplers);
if (writer.Lang().bitwiseOps) {
writer.C("uint packColor(vec4 val) {\n");
switch (from) {
case GE_FORMAT_4444:
writer.C("uint packColor(vec4 val) {\n");
writer.C(" return uint(val.r * 15.99) | (uint(val.g * 15.99) << 4u) | (uint(val.b * 15.99) << 8u) | (uint(val.a * 15.99) << 12u);\n");
writer.C("}\n");
break;
case GE_FORMAT_5551:
writer.C("uint packColor(vec4 val) {\n");
writer.C(" uint color = uint(val.r * 31.99) | (uint(val.g * 31.99) << 5u) | (uint(val.b * 31.99) << 10u);\n");
writer.C(" if (val.a >= 0.5) color |= 0x8000U;\n");
writer.C(" return color;\n");
writer.C("}\n");
break;
case GE_FORMAT_565:
writer.C("uint packColor(vec4 val) {\n");
writer.C(" return uint(val.r * 31.99) | (uint(val.g * 63.99) << 5u) | (uint(val.b * 31.99) << 11u);\n");
writer.C("}\n");
break;
case GE_FORMAT_8888:
writer.C("uint packColor(vec2 val) {\n");
writer.C(" return uint(val.r * 255.99) | (uint(val.g * 255.99) << 8u);\n");
writer.C("}\n");
break;
default:
_assert_(false);
break;
}
writer.C("}\n");
} else {
// Floating point can comfortably represent integers up to 16 million, we only need 65536 since these textures are 16-bit.
writer.C("float packColor(vec4 val) {\n");
switch (from) {
case GE_FORMAT_4444:
writer.C("float packColor(vec4 val) {\n");
writer.C(" return (floor(val.r * 15.99) + floor(val.g * 15.99) * 16.0) + (floor(val.b * 15.99) * 256.0 + floor(val.a * 15.99) * 4096.0);\n");
writer.C("}\n");
break;
case GE_FORMAT_5551:
writer.C("float packColor(vec4 val) {\n");
writer.C(" float color = floor(val.r * 31.99) + floor(val.g * 31.99) * 32.0 + floor(val.b * 31.99) * 1024.0;\n");
writer.C(" if (val.a >= 0.5) color += 32768.0;\n");
writer.C(" return color;\n");
writer.C("}\n");
break;
case GE_FORMAT_565:
writer.C("float packColor(vec4 val) {\n");
writer.C(" return floor(val.r * 31.99) + floor(val.g * 63.99) * 32.0 + floor(val.b * 31.99) * 2048.0;\n");
writer.C("}\n");
break;
case GE_FORMAT_8888:
writer.C("float packColor(vec2 val) {\n");
writer.C(" return floor(val.r * 255.99) + floor(val.g * 255.99) * 256.0;\n");
writer.C("}\n");
break;
default:
_assert_(false);
break;
}
writer.C("}\n");
}
if (writer.Lang().bitwiseOps) {
writer.C("vec4 unpackColor(uint color) {\n");
switch (to) {
case GE_FORMAT_4444:
writer.C("vec4 unpackColor(uint color) {\n");
writer.C(" vec4 outColor = vec4(float(color & 0xFU), float((color >> 4u) & 0xFU), float((color >> 8u) & 0xFU), float((color >> 12u) & 0xFU));\n");
writer.C(" outColor *= 1.0 / 15.0;\n");
writer.C(" return outColor;\n");
writer.C("}\n");
break;
case GE_FORMAT_5551:
writer.C("vec4 unpackColor(uint color) {\n");
writer.C(" vec4 outColor = vec4(float(color & 0x1FU), float((color >> 5u) & 0x1FU), float((color >> 10u) & 0x1FU), 0.0);\n");
writer.C(" outColor.rgb *= 1.0 / 31.0;\n");
writer.C(" outColor.a = float(color >> 15);\n");
writer.C(" return outColor;\n");
writer.C("}\n");
break;
case GE_FORMAT_565:
writer.C("vec4 unpackColor(uint color) {\n");
writer.C(" vec4 outColor = vec4(float(color & 0x1FU), float((color >> 5u) & 0x3FU), float((color >> 11u) & 0x1FU), 1.0);\n");
writer.C(" outColor.rb *= 1.0 / 31.0;\n");
writer.C(" outColor.g *= 1.0 / 63.0;\n");
writer.C(" return outColor;\n");
writer.C("}\n");
break;
case GE_FORMAT_8888:
writer.C("vec4 unpackColor(uint colorLeft, uint colorRight) {\n");
writer.C(" vec4 outColor = vec4(float(colorLeft & 0xFFu), float((colorLeft >> 8u) & 0xFFu),\n");
writer.C(" float(colorRight & 0xFFu), float((colorRight >> 8u) & 0xFFu));\n");
writer.C(" outColor *= 1.0 / 255.0;\n");
writer.C(" return outColor;\n");
writer.C("}\n");
break;
default:
_assert_(false);
break;
}
writer.C(" return outColor;\n");
writer.C("}\n");
} else {
writer.C("vec4 unpackColor(float val) {\n");
switch (to) {
case GE_FORMAT_4444:
writer.C("vec4 unpackColor(float val) {\n");
writer.C(" vec4 outColor = vec4(mod(floor(color), 16.0), mod(floor(color / 16.0), 16.0),");
writer.C(" mod(floor(color / 256.0), 16.0), mod(floor(color / 4096.0), 16.0)); \n");
writer.C(" outColor *= 1.0 / 15.0;\n");
writer.C(" return outColor;\n");
writer.C("}\n");
break;
case GE_FORMAT_5551:
writer.C("vec4 unpackColor(float val) {\n");
writer.C(" vec4 outColor = vec4(mod(floor(color), 32.0), mod(floor(color / 32.0), 32.0), mod(floor(color / 1024.0), 32.0), 0.0);\n");
writer.C(" outColor.rgb *= 1.0 / 31.0;\n");
writer.C(" outColor.a = floor(color / 32768.0);\n");
writer.C(" return outColor;\n");
writer.C("}\n");
break;
case GE_FORMAT_565:
writer.C("vec4 unpackColor(float val) {\n");
writer.C(" vec4 outColor = vec4(mod(floor(color), 32.0), mod(floor(color / 32.0), 64.0), mod(floor(color / 2048.0), 32.0), 0.0);\n");
writer.C(" outColor.rb *= 1.0 / 31.0;\n");
writer.C(" outColor.g *= 1.0 / 63.0;\n");
writer.C(" outColor.a = 1.0;\n");
writer.C(" return outColor;\n");
writer.C("}\n");
break;
case GE_FORMAT_8888:
writer.C("vec4 unpackColor(float colorLeft, float colorRight) {\n");
writer.C(" vec4 outColor = vec4(mod(floor(colorLeft), 256.0), mod(floor(colorLeft / 256.0), 256.0),\n");
writer.C(" mod(floor(colorRight), 256.0), mod(floor(colorRight / 256.0), 256.0));\n");
writer.C(" outColor *= 1.0 / 255.0;\n");
writer.C(" return outColor;\n");
writer.C("}\n");
break;
default:
_assert_(false);
break;
}
writer.C(" return outColor;\n");
writer.C("}\n");
}
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
writer.C(" vec4 val = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
writer.BeginFSMain(g_draw2Duniforms, varyings, FSFLAG_NONE);
if (IsBufferFormat16Bit(from) && IsBufferFormat16Bit(to)) {
if (writer.Lang().bitwiseOps) {
writer.C("uint color = packColor(val);\n");
writer.C("vec4 outColor = unpackColor(color);\n");
}
} else {
writer.C(" vec4 val = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
writer.C(" vec4 outColor = unpackColor(packColor(val));\n");
} else if (IsBufferFormat16Bit(from) && !IsBufferFormat16Bit(to)) {
// 16-to-32-bit (two pixels, draw size is halved)
writer.C(" vec4 valLeft = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
writer.C(" vec4 valRight = ").SampleTexture2D("tex", "v_texcoord.xy + vec2(0.5 / texSize.x, 0.0)").C(";\n");
writer.C(" vec4 outColor = unpackColor(packColor(valLeft), packColor(valRight));\n");
_assert_("not yet implemented");
} else if (!IsBufferFormat16Bit(from) && IsBufferFormat16Bit(to)) {
// 32-to-16-bit (half of the pixel, draw size is doubled).
writer.C(" vec4 val = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
writer.C(" float u = mod(floor(v_texcoord.x * texSize.x * 2.0), 2.0);\n");
writer.C(" vec4 outColor = unpackColor(u == 0.0 ? packColor(val.rg) : packColor(val.ba));\n");
}
writer.EndFSMain("outColor", FSFLAG_NONE);

View File

@ -1149,6 +1149,10 @@ ULES01441 = true
ULJM05600 = true
ULJM05775 = true
# Spongebob - The Yellow Avenger (see #15898)
ULUS10092 = true
ULES00280 = true
[ShaderColorBitmask]
# No users right now, but keeping it around as a more accurate option than BlueToAlpha, for debugging mainly Outrun.