More shadergen work

Buildfix
This commit is contained in:
Henrik Rydgård 2020-11-03 15:44:57 +01:00
parent 19b4febbbf
commit f2e315b9a6
22 changed files with 203 additions and 214 deletions

View File

@ -31,13 +31,12 @@ void ShaderLanguageDesc::Init(ShaderLanguage lang) {
fragColor0 = "fragColor0";
fragColor1 = "fragColor1";
texture = "texture";
texelFetch = nullptr;
bitwiseOps = false;
texelFetch = "texelFetch";
bitwiseOps = true;
lastFragData = nullptr;
gles = false;
gles = true;
forceMatrix4x4 = true;
glslES30 = true;
bitwiseOps = true;
texelFetch = "texelFetch";
break;
case GLSL_VULKAN:

View File

@ -1,5 +1,9 @@
#pragma once
#include <vector>
#include <cstdint>
#include <cstddef> // for size_t
// GLSL_1xx and GLSL_3xx each cover a lot of sub variants. All the little quirks
// that differ are covered in ShaderLanguageDesc.
// Defined as a bitmask so stuff like GetSupportedShaderLanguages can return combinations.
@ -47,6 +51,30 @@ struct ShaderLanguageDesc {
bool coefsFromBuffers = false;
};
enum class UniformType : int8_t {
FLOAT1,
FLOAT2,
FLOAT3,
FLOAT4,
MATRIX4X4,
};
// Describe uniforms intricately enough that we can support them on all backends.
// This will generate a uniform struct on the newer backends and individual uniforms on the older ones.
struct UniformDesc {
const char *name; // For GL
int16_t vertexReg; // For D3D
int16_t fragmentReg; // For D3D
UniformType type;
int16_t offset;
// TODO: Support array elements etc.
};
struct UniformBufferDesc {
size_t uniformBufferSize;
std::vector<UniformDesc> uniforms;
};
// For passing error messages from shader compilation (and other critical issues) back to the host.
// This can run on any thread - be aware!
// TODO: See if we can find a less generic name for this.

View File

@ -120,6 +120,7 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions
if (lang_.gles) {
C("precision highp float;\n");
}
C("#define gl_VertexIndex gl_VertexID\n");
break;
}
if (!lang_.gles) {
@ -134,18 +135,47 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions
}
void ShaderWriter::BeginVSMain(Slice<InputDef> inputs, Slice<UniformDef> uniforms, Slice<VaryingDef> varyings) {
_assert_(this->stage_ == ShaderStage::Vertex);
switch (lang_.shaderLanguage) {
case HLSL_D3D11:
case HLSL_D3D9:
{
C("struct VS_OUTPUT {\n");
C(" vec4 pos : POSITION;\n");
for (auto &varying : varyings) {
F(" %s %s : %s;\n", varying.type, varying.name, varying.semantic);
}
C("};\n");
C("VS_OUTPUT main( "); // 2 spaces for the D3D9 rewind
if (lang_.shaderLanguage == HLSL_D3D11) {
C("uint gl_VertexIndex : SV_VertexID, ");
}
Rewind(2); // Get rid of the last comma.
C(") {\n");
C(" vec4 gl_Position;\n");
for (auto &varying : varyings) {
F(" %s %s;\n", varying.type, varying.name);
}
break;
}
case GLSL_VULKAN:
default:
for (auto &varying : varyings) {
F("layout(location = %d) out %s %s; // %s\n", varying.index, varying.type, varying.name, varying.semantic);
}
C("void main() {\n");
break;
default: // OpenGL
for (auto &varying : varyings) {
F("%s %s %s; // %s (%d)\n", lang_.varying_vs, varying.type, varying.name, varying.semantic, varying.index);
}
C("void main() {\n");
break;
}
}
void ShaderWriter::BeginFSMain(Slice<UniformDef> uniforms, Slice<VaryingDef> varyings) {
_assert_(this->stage_ == ShaderStage::Fragment);
switch (lang_.shaderLanguage) {
case HLSL_D3D11:
if (!uniforms.is_empty()) {
@ -186,7 +216,7 @@ void ShaderWriter::BeginFSMain(Slice<UniformDef> uniforms, Slice<VaryingDef> var
break;
default:
for (auto &varying : varyings) {
F("in %s %s; // %s\n", varying.type, varying.name, varying.semantic);
F("%s %s %s; // %s\n", lang_.varying_fs, varying.type, varying.name, varying.semantic);
}
if (!strcmp(lang_.fragColor0, "fragColor0")) {
C("out vec4 fragColor0;\n");
@ -196,11 +226,27 @@ void ShaderWriter::BeginFSMain(Slice<UniformDef> uniforms, Slice<VaryingDef> var
}
}
void ShaderWriter::EndVSMain() {
void ShaderWriter::EndVSMain(Slice<VaryingDef> varyings) {
_assert_(this->stage_ == ShaderStage::Vertex);
switch (lang_.shaderLanguage) {
case HLSL_D3D11:
case HLSL_D3D9:
C(" VS_OUTPUT vs_out;\n");
C(" vs_out.pos = gl_Position;\n");
for (auto &varying : varyings) {
F(" vs_out.%s = %s;\n", varying.name, varying.name);
}
C(" return vs_out;\n");
break;
case GLSL_VULKAN:
default: // OpenGL
break;
}
C("}\n");
}
void ShaderWriter::EndFSMain(const char *vec4_color_variable) {
_assert_(this->stage_ == ShaderStage::Fragment);
switch (lang_.shaderLanguage) {
case HLSL_D3D11:
case HLSL_D3D9:
@ -254,4 +300,3 @@ ShaderWriter &ShaderWriter::SampleTexture2D(const char *texName, const char *sam
}
return *this;
}

View File

@ -8,6 +8,8 @@
#include "GPU/GPUCommon.h"
#include "Common/Data/Collections/Slice.h"
#include "Common/GPU/thin3d.h"
// Helps generate a shader compatible with all backends.
//
// Can use the uniform buffer support in thin3d.
@ -72,7 +74,7 @@ public:
void BeginFSMain(Slice<UniformDef> uniforms, Slice<VaryingDef> varyings);
// For simple shaders that output a single color, we can deal with this generically.
void EndVSMain();
void EndVSMain(Slice<VaryingDef> varyings);
void EndFSMain(const char *vec4_color_variable);

View File

@ -396,7 +396,7 @@ DrawContext::~DrawContext() {
DestroyPresets();
}
// TODO: SSE/NEON
// TODO: Use the functions we have in Common/ColorConv.cpp.
// Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :)
void ConvertFromRGBA8888(uint8_t *dst, const uint8_t *src, uint32_t dstStride, uint32_t srcStride, uint32_t width, uint32_t height, DataFormat format) {
// Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP.
@ -455,7 +455,7 @@ void ConvertFromRGBA8888(uint8_t *dst, const uint8_t *src, uint32_t dstStride, u
}
}
// TODO: SSE/NEON
// TODO: Use the functions we have in Common/ColorConv.cpp.
// Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :)
void ConvertFromBGRA8888(uint8_t *dst, const uint8_t *src, uint32_t dstStride, uint32_t srcStride, uint32_t width, uint32_t height, DataFormat format) {
// Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP.

View File

@ -385,28 +385,7 @@ struct InputLayoutDesc {
class InputLayout : public RefCountedObject { };
enum class UniformType : int8_t {
FLOAT1,
FLOAT2,
FLOAT3,
FLOAT4,
MATRIX4X4,
};
// For emulation of uniform buffers on D3D9/GL
struct UniformDesc {
const char *name; // For GL
int16_t vertexReg; // For D3D
int16_t fragmentReg; // For D3D
UniformType type;
int16_t offset;
// TODO: Support array elements etc.
};
struct UniformBufferDesc {
size_t uniformBufferSize;
std::vector<UniformDesc> uniforms;
};
// Uniform types have moved to Shader.h.
class ShaderModule : public RefCountedObject {
public:

View File

@ -35,6 +35,7 @@
#include "GPU/Common/PostShader.h"
#include "GPU/Common/PresentationCommon.h"
#include "GPU/Common/TextureCacheCommon.h"
#include "GPU/Common/ReinterpretFramebuffer.h"
#include "GPU/Debugger/Record.h"
#include "GPU/Debugger/Stepping.h"
#include "GPU/GPUInterface.h"
@ -516,6 +517,59 @@ void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer
}
}
void FramebufferManagerCommon::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat oldFormat) {
if (!useBufferedRendering_ || !vfb->fbo) {
return;
}
GEBufferFormat newFormat = vfb->format;
_assert_(newFormat != oldFormat);
// We only reinterpret between 16 - bit formats, for now.
if (!IsGeBufferFormat16BitColor(oldFormat) || !IsGeBufferFormat16BitColor(newFormat)) {
// 16->32 and 32->16 will require some more specialized shaders.
return;
}
if (!reinterpretVS_) {
char *buffer = new char[4000];
const ShaderLanguageDesc &desc = draw_->GetShaderLanguageDesc();
GenerateReinterpretVertexShader(buffer, desc);
reinterpretVS_ = draw_->CreateShaderModule(ShaderStage::Vertex, desc.shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "reinterpret_vs");
delete[] buffer;
}
// See if we need to create a new pipeline.
if (!reinterpretFromTo_[(int)oldFormat][(int)newFormat]) {
std::vector<Draw::ShaderModule *> shaders;
using namespace Draw;
Draw::PipelineDesc desc{};
// We use a "fullscreen triangle".
InputLayoutDesc inputDesc{}; // No inputs, we generate it in the shader.
InputLayout *inputLayout = draw_->CreateInputLayout(inputDesc);
DepthStencilState *depth = draw_->CreateDepthStencilState({ false, false, Comparison::LESS });
BlendState *blendstateOff = draw_->CreateBlendState({ false, 0xF });
RasterState *rasterNoCull = draw_->CreateRasterState({});
// No uniforms for these, only a single texture input.
PipelineDesc pipelineDesc{ Primitive::TRIANGLE_LIST, shaders, inputLayout, depth, blendstateOff, rasterNoCull, nullptr };
Pipeline *pipeline = draw_->CreateGraphicsPipeline(pipelineDesc);
inputLayout->Release();
depth->Release();
blendstateOff->Release();
rasterNoCull->Release();
}
// Copy to a temp framebuffer.
Draw::Framebuffer *temp = GetTempFBO(TempFBO::COPY, vfb->renderWidth, vfb->renderHeight);
shaderManager_->DirtyLastShader();
textureCache_->ForgetLastTexture();
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE);
}
void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) {
if (ShouldDownloadFramebuffer(vfb) && !vfb->memoryUpdated) {
ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height);

View File

@ -189,7 +189,7 @@ class TextureCacheCommon;
class FramebufferManagerCommon {
public:
explicit FramebufferManagerCommon(Draw::DrawContext *draw);
FramebufferManagerCommon(Draw::DrawContext *draw);
virtual ~FramebufferManagerCommon();
virtual void Init();
@ -344,7 +344,7 @@ protected:
void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged);
void NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth);
virtual void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) = 0;
void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old);
void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst);
void ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, int h, bool force = false, bool skipCopy = false);
@ -428,4 +428,10 @@ protected:
FBO_OLD_AGE = 5,
FBO_OLD_USAGE_FLAG = 15,
};
// Thin3D stuff for reinterpreting image data between the various 16-bit formats.
// Safe, not optimal - there might be input attachment tricks, etc, but we can't use them
// since we don't want N different implementations.
Draw::Pipeline *reinterpretFromTo_[3][3];
Draw::ShaderModule *reinterpretVS_ = nullptr;
};

View File

@ -253,13 +253,13 @@ bool PresentationCommon::BuildPostShader(const ShaderInfo *shaderInfo, const Sha
return false;
}
Draw::UniformBufferDesc postShaderDesc{ sizeof(PostShaderUniforms), {
{ "gl_HalfPixel", 0, -1, Draw::UniformType::FLOAT4, offsetof(PostShaderUniforms, gl_HalfPixel) },
{ "u_texelDelta", 1, 1, Draw::UniformType::FLOAT2, offsetof(PostShaderUniforms, texelDelta) },
{ "u_pixelDelta", 2, 2, Draw::UniformType::FLOAT2, offsetof(PostShaderUniforms, pixelDelta) },
{ "u_time", 3, 3, Draw::UniformType::FLOAT4, offsetof(PostShaderUniforms, time) },
{ "u_setting", 4, 4, Draw::UniformType::FLOAT4, offsetof(PostShaderUniforms, setting) },
{ "u_video", 5, 5, Draw::UniformType::FLOAT1, offsetof(PostShaderUniforms, video) },
UniformBufferDesc postShaderDesc{ sizeof(PostShaderUniforms), {
{ "gl_HalfPixel", 0, -1, UniformType::FLOAT4, offsetof(PostShaderUniforms, gl_HalfPixel) },
{ "u_texelDelta", 1, 1, UniformType::FLOAT2, offsetof(PostShaderUniforms, texelDelta) },
{ "u_pixelDelta", 2, 2, UniformType::FLOAT2, offsetof(PostShaderUniforms, pixelDelta) },
{ "u_time", 3, 3, UniformType::FLOAT4, offsetof(PostShaderUniforms, time) },
{ "u_setting", 4, 4, UniformType::FLOAT4, offsetof(PostShaderUniforms, setting) },
{ "u_video", 5, 5, UniformType::FLOAT1, offsetof(PostShaderUniforms, video) },
} };
Draw::Pipeline *pipeline = CreatePipeline({ vs, fs }, true, &postShaderDesc);
if (!pipeline)
@ -366,7 +366,7 @@ void PresentationCommon::DeviceRestore(Draw::DrawContext *draw) {
CreateDeviceObjects();
}
Draw::Pipeline *PresentationCommon::CreatePipeline(std::vector<Draw::ShaderModule *> shaders, bool postShader, const Draw::UniformBufferDesc *uniformDesc) {
Draw::Pipeline *PresentationCommon::CreatePipeline(std::vector<Draw::ShaderModule *> shaders, bool postShader, const UniformBufferDesc *uniformDesc) {
using namespace Draw;
Semantic pos = SEM_POSITION;

View File

@ -110,7 +110,7 @@ protected:
void ShowPostShaderError(const std::string &errorString);
Draw::ShaderModule *CompileShaderModule(ShaderStage stage, ShaderLanguage lang, const std::string &src, std::string *errorString);
Draw::Pipeline *CreatePipeline(std::vector<Draw::ShaderModule *> shaders, bool postShader, const Draw::UniformBufferDesc *uniformDesc);
Draw::Pipeline *CreatePipeline(std::vector<Draw::ShaderModule *> shaders, bool postShader, const UniformBufferDesc *uniformDesc);
bool BuildPostShader(const ShaderInfo *shaderInfo, const ShaderInfo *next);
bool AllocateFramebuffer(int w, int h);

View File

@ -1,8 +1,13 @@
#include <cstdarg>
#include "Common/GPU/Shader.h"
#include "Common/GPU/ShaderWriter.h"
#include "GPU/Common/ReinterpretFramebuffer.h"
static const VaryingDef varyings[1] = {
{ "vec2", "v_texcoord", "TEXCOORD0" },
};
// TODO: We could have an option to preserve any extra color precision. But gonna start without it.
// Requires full size integer math.
bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang) {
@ -15,10 +20,6 @@ bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBuff
writer.DeclareSampler2D("samp", 0);
writer.DeclareTexture2D("tex", 0);
static const VaryingDef varyings[1] = {
{ "vec4", "v_texcoord", "TEXCOORD0" },
};
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings);
writer.C(" vec4 val = ").SampleTexture2D("tex", "samp", "v_texcoord.xy").C(";\n");
@ -62,3 +63,20 @@ bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBuff
writer.EndFSMain("outColor");
return true;
}
bool GenerateReinterpretVertexShader(char *buffer, const ShaderLanguageDesc &lang) {
if (!lang.bitwiseOps) {
return false;
}
ShaderWriter writer(buffer, lang, ShaderStage::Vertex, nullptr, 0);
writer.BeginVSMain(Slice<InputDef>::empty(), Slice<UniformDef>::empty(), varyings);
writer.C(" float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n");
writer.C(" float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n");
writer.C(" v_texcoord = (vec2(x, y) + vec2(1.0, 1.0)) * 0.5;\n");
writer.C(" gl_Position = vec4(x, y, 0.0, 1.0);\n");
writer.EndVSMain(varyings);
return true;
}

View File

@ -6,3 +6,7 @@
#include "Common/GPU/ShaderWriter.h"
bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang);
// Just a single one. Can probably be shared with a lot of similar use cases.
// Generates the coordinates for a fullscreen triangle.
bool GenerateReinterpretVertexShader(char *buffer, const ShaderLanguageDesc &lang);

View File

@ -252,43 +252,6 @@ void FramebufferManagerD3D11::Bind2DShader() {
context_->VSSetShader(quadVertexShader_, 0, 0);
}
void FramebufferManagerD3D11::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) {
if (!useBufferedRendering_ || !vfb->fbo) {
return;
}
// Technically, we should at this point re-interpret the bytes of the old format to the new.
// That might get tricky, and could cause unnecessary slowness in some games.
// For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts.
// (it uses 565 to write zeros to the buffer, then 4444 to actually render the shadow.)
//
// The best way to do this may ultimately be to create a new FBO (combine with any resize?)
// and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex
// to exactly reproduce in 4444 and 8888 formats.
if (old == GE_FORMAT_565) {
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "ReformatFramebuffer");
context_->OMSetDepthStencilState(stockD3D11.depthStencilDisabled, 0xFF);
context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[D3D11_COLOR_WRITE_ENABLE_ALPHA], nullptr, 0xFFFFFFFF);
context_->RSSetState(stockD3D11.rasterStateNoCull);
context_->IASetInputLayout(quadInputLayout_);
context_->PSSetShader(quadPixelShader_, nullptr, 0);
context_->VSSetShader(quadVertexShader_, nullptr, 0);
context_->IASetVertexBuffers(0, 1, &fsQuadBuffer_, &quadStride_, &quadOffset_);
context_->PSSetSamplers(0, 1, &stockD3D11.samplerPoint2DClamp);
context_->PSSetShaderResources(0, 1, &nullTextureView_);
shaderManagerD3D11_->DirtyLastShader();
D3D11_VIEWPORT vp{ 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f };
context_->RSSetViewports(1, &vp);
context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
context_->Draw(4, 0);
textureCache_->ForgetLastTexture();
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE);
}
}
static void CopyPixelDepthOnly(u32 *dstp, const u32 *srcp, size_t c) {
size_t x = 0;

View File

@ -42,7 +42,6 @@ public:
void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags) override;
void EndFrame();
void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) override;
virtual bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override;

View File

@ -228,83 +228,6 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {
device_->SetVertexShader(pFramebufferVertexShader);
}
void FramebufferManagerDX9::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) {
if (!useBufferedRendering_ || !vfb->fbo) {
return;
}
// Technically, we should at this point re-interpret the bytes of the old format to the new.
// That might get tricky, and could cause unnecessary slowness in some games.
// For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts.
// (it uses 565 to write zeros to the buffer, then 4444 to actually render the shadow.)
//
// The best way to do this may ultimately be to create a new FBO (combine with any resize?)
// and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex
// to exactly reproduce in 4444 and 8888 formats.
if (old == GE_FORMAT_565) {
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "ReformatFramebuffer");
dxstate.scissorTest.disable();
dxstate.depthWrite.set(FALSE);
dxstate.colorMask.set(false, false, false, true);
dxstate.stencilFunc.set(D3DCMP_ALWAYS, 0, 0);
dxstate.stencilMask.set(0xFF);
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_PARAMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
float coord[20] = {
-1.0f,-1.0f,0, 0,0,
1.0f,-1.0f,0, 0,0,
1.0f,1.0f,0, 0,0,
-1.0f,1.0f,0, 0,0,
};
dxstate.cullMode.set(false, false);
device_->SetVertexDeclaration(pFramebufferVertexDecl);
device_->SetPixelShader(pFramebufferPixelShader);
device_->SetVertexShader(pFramebufferVertexShader);
shaderManagerDX9_->DirtyLastShader();
device_->SetTexture(0, nullTex_);
D3DVIEWPORT9 vp{ 0, 0, (DWORD)vfb->renderWidth, (DWORD)vfb->renderHeight, 0.0f, 1.0f };
device_->SetViewport(&vp);
// This should clear stencil and alpha without changing the other colors.
HRESULT hr = device_->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float));
if (FAILED(hr)) {
ERROR_LOG_REPORT(G3D, "ReformatFramebufferFrom() failed: %08x", hr);
}
dxstate.viewport.restore();
textureCache_->ForgetLastTexture();
}
}
static void CopyPixelDepthOnly(u32 *dstp, const u32 *srcp, size_t c) {
size_t x = 0;
#ifdef _M_SSE
size_t sseSize = (c / 4) * 4;
const __m128i srcMask = _mm_set1_epi32(0x00FFFFFF);
const __m128i dstMask = _mm_set1_epi32(0xFF000000);
__m128i *dst = (__m128i *)dstp;
const __m128i *src = (const __m128i *)srcp;
for (; x < sseSize; x += 4) {
const __m128i bits24 = _mm_and_si128(_mm_load_si128(src), srcMask);
const __m128i bits8 = _mm_and_si128(_mm_load_si128(dst), dstMask);
_mm_store_si128(dst, _mm_or_si128(bits24, bits8));
dst++;
src++;
}
#endif
// Copy the remaining pixels that didn't fit in SSE.
for (; x < c; ++x) {
memcpy(dstp + x, srcp + x, 3);
}
}
LPDIRECT3DSURFACE9 FramebufferManagerDX9::GetOffscreenSurface(LPDIRECT3DSURFACE9 similarSurface, VirtualFramebuffer *vfb) {
D3DSURFACE_DESC desc = {};
HRESULT hr = similarSurface->GetDesc(&desc);

View File

@ -47,7 +47,6 @@ public:
void DestroyAllFBOs();
void EndFrame();
void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) override;
virtual bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override;

View File

@ -242,27 +242,6 @@ void FramebufferManagerGLES::DrawActiveTexture(float x, float y, float w, float
}
}
void FramebufferManagerGLES::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) {
if (!useBufferedRendering_ || !vfb->fbo) {
return;
}
// Technically, we should at this point re-interpret the bytes of the old format to the new.
// That might get tricky, and could cause unnecessary slowness in some games.
// For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts.
// (it uses 565 to write zeros to the buffer, then 4444 to actually render the shadow.)
//
// The best way to do this may ultimately be to create a new FBO (combine with any resize?)
// and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex
// to exactly reproduce in 4444 and 8888 formats.
if (old == GE_FORMAT_565) {
// Clear alpha and stencil.
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "ReformatFramebuffer");
render_->Clear(0, 0.0f, 0, GL_COLOR_BUFFER_BIT, 0x8, 0, 0, 0, 0);
}
}
void FramebufferManagerGLES::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) {
_assert_msg_(nvfb->fbo, "Expecting a valid nvfb in UpdateDownloadTempBuffer");

View File

@ -50,8 +50,6 @@ public:
void DeviceLost() override;
void DeviceRestore(Draw::DrawContext *draw) override;
void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) override;
bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override;
bool GetOutputFramebuffer(GPUDebugBuffer &buffer) override;

View File

@ -242,32 +242,6 @@ int FramebufferManagerVulkan::GetLineWidth() {
}
}
// This also binds vfb as the current render target.
void FramebufferManagerVulkan::ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) {
if (!useBufferedRendering_ || !vfb->fbo) {
return;
}
// Technically, we should at this point re-interpret the bytes of the old format to the new.
// That might get tricky, and could cause unnecessary slowness in some games.
// For now, we just clear alpha/stencil from 565, which fixes shadow issues in Kingdom Hearts.
// (it uses 565 to write zeros to the buffer, then 4444 to actually render the shadow.)
//
// The best way to do this may ultimately be to create a new FBO (combine with any resize?)
// and blit with a shader to that, then replace the FBO on vfb. Stencil would still be complex
// to exactly reproduce in 4444 and 8888 formats.
if (old == GE_FORMAT_565) {
// We have to bind here instead of clear, since it can be that no framebuffer is bound.
// The backend can sometimes directly optimize it to a clear.
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "ReformatFramebuffer");
// Need to dirty anything that has command buffer dynamic state, in case we started a new pass above.
// Should find a way to feed that information back, maybe... Or simply correct the issue in the rendermanager.
gstate_c.Dirty(DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE);
}
}
void FramebufferManagerVulkan::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) {
// Nothing to do here.
}

View File

@ -53,7 +53,6 @@ public:
void DeviceRestore(Draw::DrawContext *draw) override;
int GetLineWidth();
void ReformatFramebufferFrom(VirtualFramebuffer *vfb, GEBufferFormat old) override;
bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override;

View File

@ -287,6 +287,9 @@ enum GEBufferFormat
};
const char *GeBufferFormatToString(GEBufferFormat fmt);
inline bool IsGeBufferFormat16BitColor(GEBufferFormat fmt) {
return (int)fmt < 3;
}
#define GE_VTYPE_TRANSFORM (0<<23)
#define GE_VTYPE_THROUGH (1<<23)

View File

@ -169,6 +169,23 @@ bool TestReinterpretShaders() {
// Generate all despite failures - it's only 6.
bool failed = false;
for (int k = 0; k < ARRAY_SIZE(languages); k++) {
ShaderLanguageDesc desc(languages[k]);
if (!GenerateReinterpretVertexShader(buffer, desc)) {
printf("Failed!\n%s\n", buffer);
failed = true;
} else {
std::string errorMessage;
if (!TestCompileShader(buffer, languages[k], true, &errorMessage)) {
printf("Error compiling fragment shader:\n\n%s\n\n%s\n", LineNumberString(buffer).c_str(), errorMessage.c_str());
failed = true;
return false;
} else {
printf("===\n%s\n===\n", buffer);
}
}
}
for (int k = 0; k < ARRAY_SIZE(languages); k++) {
printf("=== %s ===\n\n", ShaderLanguageToString(languages[k]));