From 71e310aff840224078426eb7af8d54f71b443b1a Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 14 Sep 2014 02:21:41 -0700 Subject: [PATCH] d3d: Initial attempt to upload stencil. --- GPU/Directx9/FramebufferDX9.cpp | 14 +- GPU/Directx9/FramebufferDX9.h | 5 +- GPU/Directx9/PixelShaderGeneratorDX9.h | 3 + GPU/Directx9/StencilBufferDX9.cpp | 282 +++++++++++++++++++++++++ GPU/GLES/StencilBuffer.cpp | 2 + GPU/GPU.vcxproj | 1 + GPU/GPU.vcxproj.filters | 3 + 7 files changed, 304 insertions(+), 6 deletions(-) create mode 100644 GPU/Directx9/StencilBufferDX9.cpp diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 7572e6b8a..bcab904c0 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -123,6 +123,9 @@ namespace DX9 { FramebufferManagerDX9::FramebufferManagerDX9() : drawPixelsTex_(0), convBuf(0), + stencilUploadPS_(nullptr), + stencilUploadVS_(nullptr), + stencilUploadFailed_(false), gameUsesSequentialCopies_(false) { } @@ -137,6 +140,12 @@ namespace DX9 { it->second.surface->Release(); } delete [] convBuf; + if (stencilUploadPS_) { + stencilUploadPS_->Release(); + } + if (stencilUploadVS_) { + stencilUploadVS_->Release(); + } } static inline void ARGB8From4444(u16 c, u32 * dst) { @@ -1054,11 +1063,6 @@ namespace DX9 { return list; } - bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZero) { - // TODO - return false; - } - void FramebufferManagerDX9::DecimateFBOs() { fbo_unbind(); currentRenderVfb_ = 0; diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index b3ec0bdf5..a788db745 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -75,7 +75,7 @@ public: std::vector GetFramebufferList(); - bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false); + virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) override; void DestroyFramebuf(VirtualFramebuffer *vfb); void ResizeFramebufFBO(VirtualFramebuffer *vfb, u16 w, u16 h, bool force = false); @@ -119,6 +119,9 @@ private: u8 *convBuf; int plainColorLoc_; + LPDIRECT3DPIXELSHADER9 stencilUploadPS_; + LPDIRECT3DVERTEXSHADER9 stencilUploadVS_; + bool stencilUploadFailed_; TextureCacheDX9 *textureCache_; ShaderManagerDX9 *shaderManager_; diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.h b/GPU/Directx9/PixelShaderGeneratorDX9.h index 4276076a5..1d9da1a5e 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.h +++ b/GPU/Directx9/PixelShaderGeneratorDX9.h @@ -58,4 +58,7 @@ bool IsColorTestTriviallyTrue(); #define CONST_PS_ALPHACOLORMASK 2 #define CONST_PS_FOGCOLOR 3 +// For stencil upload +#define CONST_PS_STENCILVALUE 4 + }; diff --git a/GPU/Directx9/StencilBufferDX9.cpp b/GPU/Directx9/StencilBufferDX9.cpp new file mode 100644 index 000000000..b610d6d77 --- /dev/null +++ b/GPU/Directx9/StencilBufferDX9.cpp @@ -0,0 +1,282 @@ +// Copyright (c) 2014- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "base/logging.h" + +#include "helper/dx_state.h" +#include "helper/fbo.h" +#include "Core/Reporting.h" +#include "GPU/Directx9/FramebufferDX9.h" +#include "GPU/Directx9/PixelShaderGeneratorDX9.h" +#include "GPU/Directx9/ShaderManagerDX9.h" +#include "GPU/Directx9/TextureCacheDX9.h" + +namespace DX9 { + +#define STR_HELPER(x) #x +#define STR(x) STR_HELPER(x) + +static const char *stencil_ps = +"sampler tex: register(s0);\n" +// TODO: Don't use fixed registers? Or don't overlap? +"float4 u_stencilValue : register(c" STR(CONST_PS_STENCILVALUE) ");\n" +"struct PS_IN {\n" +" float2 v_texcoord0 : TEXCOORD0;\n" +"};\n" +"float roundAndScaleTo255f(in float x) { return floor(x * 255.99); }\n" +"float4 main(PS_IN In) : COLOR {\n" +" float4 index = tex2D(tex, In.v_texcoord0);\n" +" float shifted = roundAndScaleTo255f(index.a) / roundAndScaleTo255f(u_stencilValue.x);\n" +" clip(fmod(floor(shifted), 2.0) - 0.99);\n" +" return index.aaaa;\n" +"}\n"; + +static const char *stencil_vs = +"struct VS_IN {\n" +" float4 a_position : POSITION;\n" +" float2 a_texcoord0 : TEXCOORD0;\n" +"};\n" +"struct VS_OUT {\n" +" float4 position : POSITION;\n" +" float2 v_texcoord0 : TEXCOORD0;\n" +"};\n" +"VS_OUT main(VS_IN In) {\n" +" VS_OUT Out;\n" +" Out.position = In.a_position;\n" +" Out.v_texcoord0 = In.a_texcoord0;\n" +" return Out;\n" +"}\n"; + +static u8 StencilBits5551(const u8 *ptr8, u32 numPixels) { + const u32 *ptr = (const u32 *)ptr8; + + for (u32 i = 0; i < numPixels / 2; ++i) { + if (ptr[i] & 0x80008000) { + return 1; + } + } + + return 0; +} + +static u8 StencilBits4444(const u8 *ptr8, u32 numPixels) { + const u32 *ptr = (const u32 *)ptr8; + u32 bits = 0; + + for (u32 i = 0; i < numPixels / 2; ++i) { + bits |= ptr[i]; + } + + return ((bits >> 12) & 0xF) | (bits >> 28); +} + +static u8 StencilBits8888(const u8 *ptr8, u32 numPixels) { + const u32 *ptr = (const u32 *)ptr8; + u32 bits = 0; + + for (u32 i = 0; i < numPixels; ++i) { + bits |= ptr[i]; + } + + return bits >> 24; +} + +bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZero) { + if (!MayIntersectFramebuffer(addr)) { + return false; + } + + VirtualFramebuffer *dstBuffer = 0; + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebuffer *vfb = vfbs_[i]; + if (MaskedEqual(vfb->fb_address, addr)) { + dstBuffer = vfb; + } + } + if (!dstBuffer) { + return false; + } + + int values = 0; + u8 usedBits = 0; + + switch (dstBuffer->format) { + case GE_FORMAT_565: + // Well, this doesn't make much sense. + return false; + case GE_FORMAT_5551: + usedBits = StencilBits5551(Memory::GetPointer(addr), dstBuffer->fb_stride * dstBuffer->bufferHeight); + values = 2; + break; + case GE_FORMAT_4444: + usedBits = StencilBits4444(Memory::GetPointer(addr), dstBuffer->fb_stride * dstBuffer->bufferHeight); + values = 16; + break; + case GE_FORMAT_8888: + usedBits = StencilBits8888(Memory::GetPointer(addr), dstBuffer->fb_stride * dstBuffer->bufferHeight); + values = 256; + break; + case GE_FORMAT_INVALID: + // Impossible. + break; + } + + if (usedBits == 0) { + if (skipZero) { + // Common when creating buffers, it's already 0. We're done. + return false; + } + + // Let's not bother with the shader if it's just zero. + dxstate.scissorTest.disable(); + dxstate.colorMask.set(false, false, false, true); + // TODO: Verify this clears only stencil/alpha. + pD3Ddevice->Clear(0, NULL, D3DCLEAR_TARGET | D3DCLEAR_STENCIL, D3DCOLOR_RGBA(0, 0, 0, 0), 0.0f, 0); + return true; + } + + if (stencilUploadFailed_) { + return false; + } + + // TODO: Helper with logging? + if (!stencilUploadPS_) { + std::string errorMessage; + bool success = CompilePixelShader(stencil_ps, &stencilUploadPS_, NULL, errorMessage); + if (!errorMessage.empty()) { + if (success) { + ERROR_LOG(G3D, "Warnings in shader compilation!"); + } else { + ERROR_LOG(G3D, "Error in shader compilation!"); + } + ERROR_LOG(G3D, "Messages: %s", errorMessage.c_str()); + ERROR_LOG(G3D, "Shader source:\n%s", stencil_ps); + OutputDebugStringUTF8("Messages:\n"); + OutputDebugStringUTF8(errorMessage.c_str()); + Reporting::ReportMessage("D3D error in shader compilation: info: %s / code: %s", errorMessage.c_str(), stencil_ps); + } + if (!success) { + if (stencilUploadPS_) { + stencilUploadPS_->Release(); + } + stencilUploadPS_ = nullptr; + } + } + if (!stencilUploadVS_) { + std::string errorMessage; + bool success = CompileVertexShader(stencil_vs, &stencilUploadVS_, NULL, errorMessage); + if (!errorMessage.empty()) { + if (success) { + ERROR_LOG(G3D, "Warnings in shader compilation!"); + } else { + ERROR_LOG(G3D, "Error in shader compilation!"); + } + ERROR_LOG(G3D, "Messages: %s", errorMessage.c_str()); + ERROR_LOG(G3D, "Shader source:\n%s", stencil_vs); + OutputDebugStringUTF8("Messages:\n"); + OutputDebugStringUTF8(errorMessage.c_str()); + Reporting::ReportMessage("D3D error in shader compilation: info: %s / code: %s", errorMessage.c_str(), stencil_vs); + } + if (!success) { + if (stencilUploadVS_) { + stencilUploadVS_->Release(); + } + stencilUploadVS_ = nullptr; + } + } + if (!stencilUploadPS_ || !stencilUploadVS_) { + stencilUploadFailed_ = true; + return false; + } + + shaderManager_->DirtyLastShader(); + + DisableState(); + dxstate.colorMask.set(false, false, false, true); + dxstate.stencilTest.enable(); + dxstate.stencilOp.set(D3DSTENCILOP_REPLACE, D3DSTENCILOP_REPLACE, D3DSTENCILOP_REPLACE); + + u16 w = dstBuffer->renderWidth; + u16 h = dstBuffer->renderHeight; + + if (dstBuffer->fbo) { + fbo_bind_as_render_target(dstBuffer->fbo); + } + dxstate.viewport.set(0, 0, w, h); + + MakePixelTexture(Memory::GetPointer(addr), dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight); + + pD3Ddevice->Clear(0, NULL, D3DCLEAR_TARGET | D3DCLEAR_STENCIL, D3DCOLOR_RGBA(0, 0, 0, 0), 0.0f, 0); + + dxstate.stencilFunc.set(D3DCMP_ALWAYS, 0xFF, 0xFF); + + float fw = dstBuffer->width; + float fh = dstBuffer->height; + float coord[20] = { + 0.0f,0.0f,0.0f, 0.0f,1.0f, + fw,0.0f,0.0f, 1.0f,0.0f, + fw,fh,0.0f, 1.0f,1.0f, + 0.0f,fh,0.0f, 0.0f,1.0f, + }; + float invDestW = 1.0f / (fw * 0.5f); + float invDestH = 1.0f / (fh * 0.5f); + for (int i = 0; i < 4; i++) { + coord[i * 5] = coord[i * 5] * invDestW - 1.0f; + coord[i * 5 + 1] = -(coord[i * 5 + 1] * invDestH - 1.0f); + } + + pD3Ddevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); + + pD3Ddevice->SetVertexDeclaration(pFramebufferVertexDecl); + pD3Ddevice->SetPixelShader(stencilUploadPS_); + pD3Ddevice->SetVertexShader(stencilUploadVS_); + + pD3Ddevice->SetTexture(0, drawPixelsTex_); + + shaderManager_->DirtyLastShader(); + textureCache_->ForgetLastTexture(); + + for (int i = 1; i < values; i += i) { + if (!(usedBits & i)) { + // It's already zero, let's skip it. + continue; + } + if (dstBuffer->format == GE_FORMAT_4444) { + dxstate.stencilMask.set(Convert4To8(i)); + const float f[4] = {i * (16.0f / 255.0f)}; + pD3Ddevice->SetPixelShaderConstantF(CONST_PS_STENCILVALUE, f, 1); + } else if (dstBuffer->format == GE_FORMAT_5551) { + dxstate.stencilMask.set(0xFF); + const float f[4] = {i * (128.0f / 255.0f)}; + pD3Ddevice->SetPixelShaderConstantF(CONST_PS_STENCILVALUE, f, 1); + } else { + dxstate.stencilMask.set(i); + const float f[4] = {i * (1.0f / 255.0f)}; + pD3Ddevice->SetPixelShaderConstantF(CONST_PS_STENCILVALUE, f, 1); + } + HRESULT hr = pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); + if (FAILED(hr)) { + ERROR_LOG_REPORT(G3D, "Failed to draw stencil bit %x: %08x", i, hr); + } + } + dxstate.stencilMask.set(0xFF); + + RebindFramebuffer(); + return true; +} + +} // namespace DX9 diff --git a/GPU/GLES/StencilBuffer.cpp b/GPU/GLES/StencilBuffer.cpp index 44657e7c8..b7a4f1140 100644 --- a/GPU/GLES/StencilBuffer.cpp +++ b/GPU/GLES/StencilBuffer.cpp @@ -20,6 +20,7 @@ #include "Core/Reporting.h" #include "GPU/GLES/Framebuffer.h" #include "GPU/GLES/ShaderManager.h" +#include "GPU/GLES/TextureCache.h" static const char *stencil_fs = #ifdef USING_GLES2 @@ -195,6 +196,7 @@ bool FramebufferManager::NotifyStencilUpload(u32 addr, int size, bool skipZero) glViewport(0, 0, w, h); MakePixelTexture(Memory::GetPointer(addr), dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight); + textureCache_->ForgetLastTexture(); glClearStencil(0); glClear(GL_STENCIL_BUFFER_BIT); diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 059225f1a..3805dd635 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -252,6 +252,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 776f901bf..3826c43d2 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -335,6 +335,9 @@ Common + + DirectX9 +