From 40db61a6808cb7bab534d6de9010ccce7212c8ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 2 Mar 2018 14:19:38 +0100 Subject: [PATCH] Remove further remains of hardware skinning. Fixes #10661 --- Core/Config.cpp | 1 - Core/Config.h | 1 - GPU/Common/DrawEngineCommon.cpp | 59 +--------- GPU/Common/ShaderCommon.h | 12 +- GPU/Common/ShaderId.cpp | 11 -- GPU/Common/ShaderId.h | 6 +- GPU/Common/ShaderUniforms.cpp | 8 -- GPU/Common/ShaderUniforms.h | 17 --- GPU/Common/SoftwareTransformCommon.cpp | 50 ++------- GPU/Common/VertexDecoderArm.cpp | 2 +- GPU/Common/VertexDecoderArm64.cpp | 2 +- GPU/Common/VertexDecoderX86.cpp | 2 +- GPU/D3D11/DrawEngineD3D11.cpp | 2 +- GPU/D3D11/GPU_D3D11.cpp | 2 - GPU/D3D11/ShaderManagerD3D11.cpp | 15 +-- GPU/D3D11/ShaderManagerD3D11.h | 5 - GPU/Directx9/DrawEngineDX9.cpp | 2 +- GPU/Directx9/GPU_DX9.cpp | 4 - GPU/Directx9/ShaderManagerDX9.cpp | 34 +----- GPU/Directx9/VertexShaderGeneratorDX9.cpp | 101 +---------------- GPU/Directx9/VertexShaderGeneratorDX9.h | 53 ++++----- GPU/GLES/DrawEngineGLES.cpp | 4 +- GPU/GLES/GPU_GLES.cpp | 4 - GPU/GLES/ShaderManagerGLES.cpp | 22 +--- GPU/GLES/ShaderManagerGLES.h | 7 -- GPU/GLES/VertexShaderGeneratorGLES.cpp | 125 +-------------------- GPU/GLES/VertexShaderGeneratorGLES.h | 2 - GPU/GPUCommon.cpp | 86 ++------------ GPU/GPUCommon.h | 3 - GPU/GPUState.cpp | 7 -- GPU/GPUState.h | 2 - GPU/Software/SoftGpu.cpp | 8 -- GPU/Software/TransformUnit.cpp | 21 ---- GPU/Vulkan/DrawEngineVulkan.cpp | 55 ++++----- GPU/Vulkan/DrawEngineVulkan.h | 9 +- GPU/Vulkan/GPU_Vulkan.cpp | 2 - GPU/Vulkan/PipelineManagerVulkan.cpp | 3 - GPU/Vulkan/PipelineManagerVulkan.h | 3 +- GPU/Vulkan/ShaderManagerVulkan.cpp | 4 - GPU/Vulkan/ShaderManagerVulkan.h | 9 -- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 52 +-------- UI/GameSettingsScreen.cpp | 7 -- ext/native/thin3d/VulkanQueueRunner.h | 2 +- headless/Headless.cpp | 1 - unittest/TestVertexJit.cpp | 3 - 45 files changed, 83 insertions(+), 747 deletions(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index 8fae1b0f6..f89d75f73 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -501,7 +501,6 @@ static ConfigSetting graphicsSettings[] = { ReportedConfigSetting("RenderingMode", &g_Config.iRenderingMode, &DefaultRenderingMode, true, true), ConfigSetting("SoftwareRenderer", &g_Config.bSoftwareRendering, false, true, true), ReportedConfigSetting("HardwareTransform", &g_Config.bHardwareTransform, true, true, true), - ReportedConfigSetting("SoftwareSkinning", &g_Config.bSoftwareSkinning, true, true, true), ReportedConfigSetting("TextureFiltering", &g_Config.iTexFiltering, 1, true, true), ReportedConfigSetting("BufferFiltering", &g_Config.iBufFilter, 1, true, true), ReportedConfigSetting("InternalResolution", &g_Config.iInternalResolution, &DefaultInternalResolution, true, true), diff --git a/Core/Config.h b/Core/Config.h index 78af006d0..43751c9a7 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -154,7 +154,6 @@ public: int iGPUBackend; bool bSoftwareRendering; bool bHardwareTransform; // only used in the GLES backend - bool bSoftwareSkinning; // may speed up some games int iRenderingMode; // 0 = non-buffered rendering 1 = buffered rendering int iTexFiltering; // 1 = off , 2 = nearest , 3 = linear , 4 = linear(CG) diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index a3746d087..472056893 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -379,13 +379,9 @@ bool DrawEngineCommon::GetCurrentSimpleVertices(int count, std::vectorDecodeVerts(bufPtr, inPtr, lowerBound, upperBound); - // OK, morphing eliminated but bones still remain to be taken care of. - // Let's do a partial software transform where we only do skinning. - VertexReader reader(bufPtr, dec->GetDecVtxFmt(), vertType); SimpleVertex *sverts = (SimpleVertex *)outPtr; @@ -397,56 +393,7 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, (u8)gstate.getMaterialAmbientA(), }; - // Let's have two separate loops, one for non skinning and one for skinning. - if (!g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) { - int numBoneWeights = vertTypeGetNumBoneWeights(vertType); - for (int i = lowerBound; i <= upperBound; i++) { - reader.Goto(i - lowerBound); - SimpleVertex &sv = sverts[i]; - if (vertType & GE_VTYPE_TC_MASK) { - reader.ReadUV(sv.uv); - } - - if (vertType & GE_VTYPE_COL_MASK) { - reader.ReadColor0_8888(sv.color); - } else { - memcpy(sv.color, defaultColor, 4); - } - - float nrm[3], pos[3]; - float bnrm[3], bpos[3]; - - if (vertType & GE_VTYPE_NRM_MASK) { - // Normals are generated during tessellation anyway, not sure if any need to supply - reader.ReadNrm(nrm); - } else { - nrm[0] = 0; - nrm[1] = 0; - nrm[2] = 1.0f; - } - reader.ReadPos(pos); - - // Apply skinning transform directly - float weights[8]; - reader.ReadWeights(weights); - // Skinning - Vec3Packedf psum(0, 0, 0); - Vec3Packedf nsum(0, 0, 0); - for (int w = 0; w < numBoneWeights; w++) { - if (weights[w] != 0.0f) { - Vec3ByMatrix43(bpos, pos, gstate.boneMatrix + w * 12); - Vec3Packedf tpos(bpos); - psum += tpos * weights[w]; - - Norm3ByMatrix43(bnrm, nrm, gstate.boneMatrix + w * 12); - Vec3Packedf tnorm(bnrm); - nsum += tnorm * weights[w]; - } - } - sv.pos = psum; - sv.nrm = nsum; - } - } else { + { for (int i = lowerBound; i <= upperBound; i++) { reader.Goto(i - lowerBound); SimpleVertex &sv = sverts[i]; @@ -711,7 +658,7 @@ void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, numDrawCalls++; vertexCountInDrawCalls_ += vertexCount; - if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) { + if (vertType & GE_VTYPE_WEIGHT_MASK) { DecodeVertsStep(decoded, decodeCounter_, decodedVerts_); decodeCounter_++; } diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index 6e4bfa8c8..8de756237 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -76,14 +76,8 @@ enum : uint64_t { DIRTY_WORLDMATRIX = 1ULL << 21, DIRTY_VIEWMATRIX = 1ULL << 22, DIRTY_TEXMATRIX = 1ULL << 23, - DIRTY_BONEMATRIX0 = 1ULL << 24, // NOTE: These must be under 32 - DIRTY_BONEMATRIX1 = 1ULL << 25, - DIRTY_BONEMATRIX2 = 1ULL << 26, - DIRTY_BONEMATRIX3 = 1ULL << 27, - DIRTY_BONEMATRIX4 = 1ULL << 28, - DIRTY_BONEMATRIX5 = 1ULL << 29, - DIRTY_BONEMATRIX6 = 1ULL << 30, - DIRTY_BONEMATRIX7 = 1ULL << 31, + + // 8 free bits here where bones used to be! // These are for hardware tessellation DIRTY_BEZIERSPLINE = 1ULL << 32, @@ -91,8 +85,6 @@ enum : uint64_t { // space for 7 more uniforms. - DIRTY_BONE_UNIFORMS = 0xFF000000ULL, - DIRTY_ALL_UNIFORMS = 0x3FFFFFFFFULL, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 3a8f0de37..8a1667f18 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -33,7 +33,6 @@ std::string VertexShaderDesc(const ShaderID &id) { int ls1 = id.Bits(VS_BIT_LS1, 2); if (uvgMode) desc << uvgModes[uvgMode]; - if (id.Bit(VS_BIT_ENABLE_BONES)) desc << "Bones:" << (id.Bits(VS_BIT_BONES, 3) + 1) << " "; // Lights if (id.Bit(VS_BIT_LIGHTING_ENABLE)) { desc << "Light: "; @@ -103,16 +102,6 @@ void ComputeVertexShaderID(ShaderID *id_out, u32 vertType, bool useHWTransform) id.SetBits(VS_BIT_LS1, 2, gstate.getUVLS1()); } - // Bones. - bool enableBones = vertTypeIsSkinningEnabled(vertType); - id.SetBit(VS_BIT_ENABLE_BONES, enableBones); - if (enableBones) { - id.SetBits(VS_BIT_BONES, 3, TranslateNumBones(vertTypeGetNumBoneWeights(vertType)) - 1); - // 2 bits. We should probably send in the weight scalefactor as a uniform instead, - // or simply preconvert all weights to floats. - id.SetBits(VS_BIT_WEIGHT_FMTSCALE, 2, (vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT); - } - // Okay, d[1] coming up. ============== if (gstate.isLightingEnabled() || doShadeMapping) { // doShadeMapping is stored as UVGenMode, so this is enough for isLightingEnabled. diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index afc7c5131..8ef501a57 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -7,7 +7,6 @@ // TODO: There will be additional bits, indicating that groups of these will be // sent to the shader and processed there. This will cut down the number of shaders ("ubershader approach") -// This is probably only really worth doing for lighting and bones. enum { VS_BIT_LMODE = 0, VS_BIT_IS_THROUGH = 1, @@ -29,10 +28,7 @@ enum { VS_BIT_UVPROJ_MODE = 18, // 2, can overlap with LS0 VS_BIT_LS0 = 18, // 2 VS_BIT_LS1 = 20, // 2 - VS_BIT_BONES = 22, // 3 should be enough, not 8 - // 25 - 29 are free. - VS_BIT_ENABLE_BONES = 30, - // 31 is free. + // 22 - 31 are free. VS_BIT_LIGHT0_COMP = 32, // 2 bits VS_BIT_LIGHT0_TYPE = 34, // 2 bits VS_BIT_LIGHT1_COMP = 36, // 2 bits diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index a05757496..302c93067 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -246,11 +246,3 @@ void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) { } } } - -void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) { - for (int i = 0; i < 8; i++) { - if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { - ConvertMatrix4x3To3x4Transposed(ub->bones[i], gstate.boneMatrix + 12 * i); - } - } -} diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index 1dc506518..82bc27a33 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -164,22 +164,5 @@ R"( float4 u_ambient; float3 u_lightspecular3; )"; -// With some cleverness, we could get away with uploading just half this when only the four or five first -// bones are being used. This is 512b, 256b would be great. -struct UB_VS_Bones { - float bones[8][12]; -}; - -static const char *ub_vs_bonesStr = -R"( mat3x4 m[8]; -)"; - -// HLSL code is shared so these names are changed to match those in DX9. -static const char *cb_vs_bonesStr = -R"( float4x3 u_bone[8]; -)"; - void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport); void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms); -void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms); - diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index a29c987c1..5ebf13570 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -144,8 +144,6 @@ void SoftwareTransform( vscale /= gstate_c.curTextureHeight; } - bool skinningEnabled = vertTypeIsSkinningEnabled(vertType); - const int w = gstate.getTextureWidth(0); const int h = gstate.getTextureHeight(0); float widthFactor = (float) w / (float) gstate_c.curTextureWidth; @@ -213,48 +211,14 @@ void SoftwareTransform( Vec3f worldnormal(0, 0, 1); reader.ReadPos(pos); - if (!skinningEnabled) { - Vec3ByMatrix43(out, pos, gstate.worldMatrix); - if (reader.hasNormal()) { - reader.ReadNrm(normal.AsArray()); - if (gstate.areNormalsReversed()) { - normal = -normal; - } - Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix); - worldnormal = worldnormal.Normalized(); - } - } else { - float weights[8]; - reader.ReadWeights(weights); - if (reader.hasNormal()) - reader.ReadNrm(normal.AsArray()); - - // Skinning - Vec3f psum(0, 0, 0); - Vec3f nsum(0, 0, 0); - for (int i = 0; i < vertTypeGetNumBoneWeights(vertType); i++) { - if (weights[i] != 0.0f) { - Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12); - Vec3f tpos(out); - psum += tpos * weights[i]; - if (reader.hasNormal()) { - Vec3f norm; - Norm3ByMatrix43(norm.AsArray(), normal.AsArray(), gstate.boneMatrix+i*12); - nsum += norm * weights[i]; - } - } - } - - // Yes, we really must multiply by the world matrix too. - Vec3ByMatrix43(out, psum.AsArray(), gstate.worldMatrix); - if (reader.hasNormal()) { - normal = nsum; - if (gstate.areNormalsReversed()) { - normal = -normal; - } - Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix); - worldnormal = worldnormal.Normalized(); + Vec3ByMatrix43(out, pos, gstate.worldMatrix); + if (reader.hasNormal()) { + reader.ReadNrm(normal.AsArray()); + if (gstate.areNormalsReversed()) { + normal = -normal; } + Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix); + worldnormal = worldnormal.Normalized(); } // Perform lighting here if enabled. don't need to check through, it's checked above. diff --git a/GPU/Common/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp index 79e6584ae..c7cddcb4d 100644 --- a/GPU/Common/VertexDecoderArm.cpp +++ b/GPU/Common/VertexDecoderArm.cpp @@ -229,7 +229,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int // Add code to convert matrices to 4x4. // Later we might want to do this when the matrices are loaded instead. int boneCount = 0; - if (NEONSkinning && dec.weighttype && g_Config.bSoftwareSkinning && dec.morphcount == 1) { + if (NEONSkinning && dec.weighttype) { // Copying from R3 to R4 MOVP2R(R3, gstate.boneMatrix); MOVP2R(R4, bones); diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp index acfa230ea..3c87a6b67 100644 --- a/GPU/Common/VertexDecoderArm64.cpp +++ b/GPU/Common/VertexDecoderArm64.cpp @@ -193,7 +193,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int // Add code to convert matrices to 4x4. // Later we might want to do this when the matrices are loaded instead. int boneCount = 0; - if (dec.weighttype && g_Config.bSoftwareSkinning && dec.morphcount == 1) { + if (dec.weighttype) { // Copying from R3 to R4 MOVP2R(X3, gstate.boneMatrix); MOVP2R(X4, bones); diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp index 70c5131e8..388be1e2d 100644 --- a/GPU/Common/VertexDecoderX86.cpp +++ b/GPU/Common/VertexDecoderX86.cpp @@ -195,7 +195,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int // Add code to convert matrices to 4x4. // Later we might want to do this when the matrices are loaded instead. int boneCount = 0; - if (dec.weighttype && g_Config.bSoftwareSkinning && dec.morphcount == 1) { + if (dec.weighttype) { MOV(PTRBITS, R(tempReg1), ImmPtr(&threeMasks)); MOVAPS(XMM4, MatR(tempReg1)); MOV(PTRBITS, R(tempReg1), ImmPtr(&aOne)); diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 2b49ae2d6..55006baca 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -344,7 +344,7 @@ void DrawEngineD3D11::DoFlush() { // Cannot cache vertex data with morph enabled. bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK); // Also avoid caching when software skinning. - if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) + if (lastVType_ & GE_VTYPE_WEIGHT_MASK) useCache = false; if (useCache) { diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp index 8ad9ebd11..2b5d76030 100644 --- a/GPU/D3D11/GPU_D3D11.cpp +++ b/GPU/D3D11/GPU_D3D11.cpp @@ -103,7 +103,6 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw) // No need to flush before the tex scale/offset commands if we are baking // the tex scale/offset into the vertices anyway. - UpdateCmdInfo(); CheckGPUFeatures(); BuildReportingInfo(); @@ -215,7 +214,6 @@ void GPU_D3D11::InitClear() { void GPU_D3D11::BeginHostFrame() { GPUCommon::BeginHostFrame(); - UpdateCmdInfo(); if (resized_) { CheckGPUFeatures(); framebufferManager_->Resized(); diff --git a/GPU/D3D11/ShaderManagerD3D11.cpp b/GPU/D3D11/ShaderManagerD3D11.cpp index 75f8d9535..3efb79ace 100644 --- a/GPU/D3D11/ShaderManagerD3D11.cpp +++ b/GPU/D3D11/ShaderManagerD3D11.cpp @@ -93,24 +93,19 @@ ShaderManagerD3D11::ShaderManagerD3D11(ID3D11Device *device, ID3D11DeviceContext codeBuffer_ = new char[16384]; memset(&ub_base, 0, sizeof(ub_base)); memset(&ub_lights, 0, sizeof(ub_lights)); - memset(&ub_bones, 0, sizeof(ub_bones)); INFO_LOG(G3D, "sizeof(ub_base): %d", (int)sizeof(ub_base)); INFO_LOG(G3D, "sizeof(ub_lights): %d", (int)sizeof(ub_lights)); - INFO_LOG(G3D, "sizeof(ub_bones): %d", (int)sizeof(ub_bones)); D3D11_BUFFER_DESC desc{sizeof(ub_base), D3D11_USAGE_DYNAMIC, D3D11_BIND_CONSTANT_BUFFER, D3D11_CPU_ACCESS_WRITE }; ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_base)); desc.ByteWidth = sizeof(ub_lights); ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_lights)); - desc.ByteWidth = sizeof(ub_bones); - ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_bones)); } ShaderManagerD3D11::~ShaderManagerD3D11() { push_base->Release(); push_lights->Release(); - push_bones->Release(); ClearShaders(); delete[] codeBuffer_; } @@ -159,21 +154,15 @@ uint64_t ShaderManagerD3D11::UpdateUniforms() { memcpy(map.pData, &ub_lights, sizeof(ub_lights)); context_->Unmap(push_lights, 0); } - if (dirty & DIRTY_BONE_UNIFORMS) { - BoneUpdateUniforms(&ub_bones, dirty); - context_->Map(push_bones, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); - memcpy(map.pData, &ub_bones, sizeof(ub_bones)); - context_->Unmap(push_bones, 0); - } } gstate_c.CleanUniforms(); return dirty; } void ShaderManagerD3D11::BindUniforms() { - ID3D11Buffer *vs_cbs[3] = { push_base, push_lights, push_bones }; + ID3D11Buffer *vs_cbs[2] = { push_base, push_lights }; ID3D11Buffer *ps_cbs[1] = { push_base }; - context_->VSSetConstantBuffers(0, 3, vs_cbs); + context_->VSSetConstantBuffers(0, 2, vs_cbs); context_->PSSetConstantBuffers(0, 1, ps_cbs); } diff --git a/GPU/D3D11/ShaderManagerD3D11.h b/GPU/D3D11/ShaderManagerD3D11.h index 3d66c9dfb..1ca3db44b 100644 --- a/GPU/D3D11/ShaderManagerD3D11.h +++ b/GPU/D3D11/ShaderManagerD3D11.h @@ -61,9 +61,6 @@ public: const std::vector &bytecode() const { return bytecode_; } bool Failed() const { return failed_; } bool UseHWTransform() const { return useHWTransform_; } - bool HasBones() const { - return id_.Bit(VS_BIT_ENABLE_BONES); - } bool HasLights() const { return usesLighting_; } @@ -134,12 +131,10 @@ private: // Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time. UB_VS_FS_Base ub_base; UB_VS_Lights ub_lights; - UB_VS_Bones ub_bones; // Not actual pushbuffers, requires D3D11.1, let's try to live without that first. ID3D11Buffer *push_base; ID3D11Buffer *push_lights; - ID3D11Buffer *push_bones; D3D11FragmentShader *lastFShader_; D3D11VertexShader *lastVShader_; diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index c72d25672..9330893a9 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -325,7 +325,7 @@ void DrawEngineDX9::DoFlush() { // Cannot cache vertex data with morph enabled. bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK); // Also avoid caching when software skinning. - if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) + if (lastVType_ & GE_VTYPE_WEIGHT_MASK) useCache = false; if (useCache) { diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index adb50ecbf..52f1a6e2d 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -83,9 +83,6 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw) ERROR_LOG(G3D, "gstate has drifted out of sync!"); } - // No need to flush before the tex scale/offset commands if we are baking - // the tex scale/offset into the vertices anyway. - UpdateCmdInfo(); CheckGPUFeatures(); BuildReportingInfo(); @@ -191,7 +188,6 @@ void GPU_DX9::InitClear() { void GPU_DX9::BeginHostFrame() { GPUCommon::BeginHostFrame(); - UpdateCmdInfo(); if (resized_) { CheckGPUFeatures(); framebufferManager_->Resized(); diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 2f5fdad0c..291446017 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -313,7 +313,7 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { } const uint64_t vsUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | -DIRTY_FOGCOEF | DIRTY_BONE_UNIFORMS | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | +DIRTY_FOGCOEF | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | DIRTY_AMBIENT | DIRTY_MATAMBIENTALPHA | DIRTY_MATSPECULAR | DIRTY_MATDIFFUSE | DIRTY_MATEMISSIVE | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3; void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { @@ -382,38 +382,6 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { #endif VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2); } - // TODO: Could even set all bones in one go if they're all dirty. -#ifdef USE_BONE_ARRAY - if (u_bone != 0) { - float allBones[8 * 16]; - - bool allDirty = true; - for (int i = 0; i < numBones; i++) { - if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { - ConvertMatrix4x3To4x4(allBones + 16 * i, gstate.boneMatrix + 12 * i); - } else { - allDirty = false; - } - } - if (allDirty) { - // Set them all with one call - //glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones); - } else { - // Set them one by one. Could try to coalesce two in a row etc but too lazy. - for (int i = 0; i < numBones; i++) { - if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { - //glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i); - } - } - } - } -#else - for (int i = 0; i < 8; i++) { - if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { - VSSetMatrix4x3_3(CONST_VS_BONE0 + 3 * i, gstate.boneMatrix + 12 * i); - } - } -#endif // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { diff --git a/GPU/Directx9/VertexShaderGeneratorDX9.cpp b/GPU/Directx9/VertexShaderGeneratorDX9.cpp index 5158fb423..8c3714440 100644 --- a/GPU/Directx9/VertexShaderGeneratorDX9.cpp +++ b/GPU/Directx9/VertexShaderGeneratorDX9.cpp @@ -37,19 +37,7 @@ namespace DX9 { -static const char * const boneWeightAttrDecl[9] = { - "#ERROR#", - "float a_w1:TEXCOORD1;\n", - "float2 a_w1:TEXCOORD1;\n", - "float3 a_w1:TEXCOORD1;\n", - "float4 a_w1:TEXCOORD1;\n", - "float4 a_w1:TEXCOORD1;\n float a_w2:TEXCOORD2;\n", - "float4 a_w1:TEXCOORD1;\n float2 a_w2:TEXCOORD2;\n", - "float4 a_w1:TEXCOORD1;\n float3 a_w2:TEXCOORD2;\n", - "float4 a_w1:TEXCOORD1;\n float4 a_w2:TEXCOORD2;\n", -}; - -enum DoLightComputation { + enum DoLightComputation { LIGHT_OFF, LIGHT_SHADE, LIGHT_FULL, @@ -80,7 +68,6 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage bool flipNormal = id.Bit(VS_BIT_NORM_REVERSE); int ls0 = id.Bits(VS_BIT_LS0, 2); int ls1 = id.Bits(VS_BIT_LS1, 2); - bool enableBones = id.Bit(VS_BIT_ENABLE_BONES); bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE); int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); @@ -104,9 +91,6 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage int numBoneWeights = 0; int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2); - if (enableBones) { - numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3); - } if (lang == HLSL_DX9) { WRITE(p, "#pragma warning( disable : 3571 )\n"); @@ -129,15 +113,6 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage WRITE(p, "float4x3 u_view : register(c%i);\n", CONST_VS_VIEW); if (doTextureTransform) WRITE(p, "float4x3 u_tex : register(c%i);\n", CONST_VS_TEXMTX); - if (enableBones) { -#ifdef USE_BONE_ARRAY - WRITE(p, "float4x3 u_bone[%i] : register(c%i);\n", numBones, CONST_VS_BONE0); -#else - for (int i = 0; i < numBoneWeights; i++) { - WRITE(p, "float4x3 u_bone%i : register(c%i);\n", i, CONST_VS_BONE0 + i * 3); - } -#endif - } if (doTexture) { WRITE(p, "float4 u_uvscaleoffset : register(c%i);\n", CONST_VS_UVSCALEOFFSET); } @@ -182,7 +157,6 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage } else { WRITE(p, "cbuffer base : register(b0) {\n%s};\n", cb_baseStr); WRITE(p, "cbuffer lights: register(b1) {\n%s};\n", cb_vs_lightsStr); - WRITE(p, "cbuffer bones : register(b2) {\n%s};\n", cb_vs_bonesStr); } // And the "varyings". @@ -192,9 +166,6 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage if ((doSpline || doBezier) && lang == HLSL_D3D11) { WRITE(p, " uint instanceId : SV_InstanceID;\n"); } - if (enableBones) { - WRITE(p, " %s", boneWeightAttrDecl[numBoneWeights]); - } if (doTexture && hasTexcoord) { WRITE(p, " float2 texcoord : TEXCOORD0;\n"); } @@ -388,7 +359,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage } } else { // Step 1: World Transform / Skinning - if (!enableBones) { + if (true) { // Hardware tessellation if (doSpline || doBezier) { WRITE(p, " uint num_patches_u = %s;\n", doBezier ? "(u_spline_count_u - 1) / 3u" : "u_spline_count_u - 3"); @@ -497,74 +468,6 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage else WRITE(p, " float3 worldnormal = float3(0.0, 0.0, 1.0);\n"); } - } else { - static const char * const boneWeightAttr[8] = { - "a_w1.x", "a_w1.y", "a_w1.z", "a_w1.w", - "a_w2.x", "a_w2.y", "a_w2.z", "a_w2.w", - }; - -#if defined(USE_FOR_LOOP) && defined(USE_BONE_ARRAY) - - // To loop through the weights, we unfortunately need to put them in a float array. - // GLSL ES sucks - no way to directly initialize an array! - switch (numBoneWeights) { - case 1: WRITE(p, " float w[1]; w[0] = a_w1;\n"); break; - case 2: WRITE(p, " float w[2]; w[0] = a_w1.x; w[1] = a_w1.y;\n"); break; - case 3: WRITE(p, " float w[3]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z;\n"); break; - case 4: WRITE(p, " float w[4]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w;\n"); break; - case 5: WRITE(p, " float w[5]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2;\n"); break; - case 6: WRITE(p, " float w[6]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2.x; w[5] = a_w2.y;\n"); break; - case 7: WRITE(p, " float w[7]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2.x; w[5] = a_w2.y; w[6] = a_w2.z;\n"); break; - case 8: WRITE(p, " float w[8]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2.x; w[5] = a_w2.y; w[6] = a_w2.z; w[7] = a_w2.w;\n"); break; - } - - WRITE(p, " mat4 skinMatrix = w[0] * u_bone[0];\n"); - if (numBoneWeights > 1) { - WRITE(p, " for (int i = 1; i < %i; i++) {\n", numBoneWeights); - WRITE(p, " skinMatrix += w[i] * u_bone[i];\n"); - WRITE(p, " }\n"); - } - -#else - if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) { - if (numBoneWeights == 1) - WRITE(p, " float4x3 skinMatrix = mul(In.a_w1, u_bone[0])"); - else - WRITE(p, " float4x3 skinMatrix = mul(In.a_w1.x, u_bone[0])"); - for (int i = 1; i < numBoneWeights; i++) { - const char *weightAttr = boneWeightAttr[i]; - // workaround for "cant do .x of scalar" issue - if (numBoneWeights == 1 && i == 0) weightAttr = "a_w1"; - if (numBoneWeights == 5 && i == 4) weightAttr = "a_w2"; - WRITE(p, " + mul(In.%s, u_bone[%i])", weightAttr, i); - } - } else { - if (numBoneWeights == 1) - WRITE(p, " float4x3 skinMatrix = mul(In.a_w1, u_bone0)"); - else - WRITE(p, " float4x3 skinMatrix = mul(In.a_w1.x, u_bone0)"); - for (int i = 1; i < numBoneWeights; i++) { - const char *weightAttr = boneWeightAttr[i]; - // workaround for "cant do .x of scalar" issue - if (numBoneWeights == 1 && i == 0) weightAttr = "a_w1"; - if (numBoneWeights == 5 && i == 4) weightAttr = "a_w2"; - WRITE(p, " + mul(In.%s, u_bone%i)", weightAttr, i); - } - } -#endif - - WRITE(p, ";\n"); - - // Trying to simplify this results in bugs in LBP... - WRITE(p, " float3 skinnedpos = mul(float4(In.position.xyz, 1.0), skinMatrix);\n"); - WRITE(p, " float3 worldpos = mul(float4(skinnedpos, 1.0), u_world);\n"); - - if (hasNormal) { - WRITE(p, " float3 skinnednormal = mul(float4(%sIn.normal, 0.0), skinMatrix);\n", flipNormal ? "-" : ""); - } else { - WRITE(p, " float3 skinnednormal = mul(float4(0.0, 0.0, %s1.0, 0.0), skinMatrix);\n", flipNormal ? "-" : ""); - } - WRITE(p, " float3 worldnormal = normalize(mul(float4(skinnednormal, 0.0), u_world));\n"); } WRITE(p, " float4 viewPos = float4(mul(float4(worldpos, 1.0), u_view), 1.0);\n"); diff --git a/GPU/Directx9/VertexShaderGeneratorDX9.h b/GPU/Directx9/VertexShaderGeneratorDX9.h index a48c110d3..585fec012 100644 --- a/GPU/Directx9/VertexShaderGeneratorDX9.h +++ b/GPU/Directx9/VertexShaderGeneratorDX9.h @@ -25,35 +25,28 @@ namespace DX9 { void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage lang = HLSL_DX9); -#define CONST_VS_PROJ 0 -#define CONST_VS_PROJ_THROUGH 4 -#define CONST_VS_VIEW 8 -#define CONST_VS_WORLD 11 -#define CONST_VS_TEXMTX 14 -#define CONST_VS_UVSCALEOFFSET 17 -#define CONST_VS_FOGCOEF 18 -#define CONST_VS_AMBIENT 19 -#define CONST_VS_BONE0 20 -#define CONST_VS_BONE1 23 -#define CONST_VS_BONE2 26 -#define CONST_VS_BONE3 29 -#define CONST_VS_BONE4 32 -#define CONST_VS_BONE5 35 -#define CONST_VS_BONE6 38 -#define CONST_VS_BONE7 41 -#define CONST_VS_BONE8 44 -#define CONST_VS_MATAMBIENTALPHA 47 -#define CONST_VS_MATDIFFUSE 48 -#define CONST_VS_MATSPECULAR 49 -#define CONST_VS_MATEMISSIVE 50 -#define CONST_VS_LIGHTPOS 51 -#define CONST_VS_LIGHTDIR 55 -#define CONST_VS_LIGHTATT 59 -#define CONST_VS_LIGHTANGLE 63 -#define CONST_VS_LIGHTSPOTCOEF 67 -#define CONST_VS_LIGHTDIFFUSE 71 -#define CONST_VS_LIGHTSPECULAR 75 -#define CONST_VS_LIGHTAMBIENT 79 -#define CONST_VS_DEPTHRANGE 83 + enum { + CONST_VS_PROJ = 0, + CONST_VS_PROJ_THROUGH = 4, + CONST_VS_VIEW = 8, + CONST_VS_WORLD = 11, + CONST_VS_TEXMTX = 14, + CONST_VS_UVSCALEOFFSET = 17, + CONST_VS_FOGCOEF = 18, + CONST_VS_AMBIENT = 19, + CONST_VS_MATAMBIENTALPHA = 20, + CONST_VS_MATDIFFUSE = 21, + CONST_VS_MATSPECULAR = 22, + CONST_VS_MATEMISSIVE = 23, + CONST_VS_LIGHTPOS = 24, + CONST_VS_LIGHTDIR = 28, + CONST_VS_LIGHTATT = 32, + CONST_VS_LIGHTANGLE = 36, + CONST_VS_LIGHTSPOTCOEF = 40, + CONST_VS_LIGHTDIFFUSE = 44, + CONST_VS_LIGHTSPECULAR = 48, + CONST_VS_LIGHTAMBIENT = 52, + CONST_VS_DEPTHRANGE = 56, + }; }; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 83b56e5ca..a02fe9f3c 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -377,7 +377,7 @@ void DrawEngineGLES::DoFlush() { // Cannot cache vertex data with morph enabled. bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK); // Also avoid caching when software skinning. - if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) + if (lastVType_ & GE_VTYPE_WEIGHT_MASK) useCache = false; // TEMPORARY @@ -521,7 +521,7 @@ void DrawEngineGLES::DoFlush() { vai->lastFrame = gpuStats.numFlips; } else { - if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) { + if (lastVType_ & GE_VTYPE_WEIGHT_MASK) { // If software skinning, we've already predecoded into "decoded". So push that content. size_t size = decodedVerts_ * dec_->GetDecVtxFmt().stride; u8 *dest = (u8 *)frameData.pushVertex->Push(size, &vertexBufferOffset, &vertexBuffer); diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index ea65a9fae..5b2e6445a 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -89,8 +89,6 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw) // No need to flush before the tex scale/offset commands if we are baking // the tex scale/offset into the vertices anyway. - UpdateCmdInfo(); - BuildReportingInfo(); // Update again after init to be sure of any silly driver problems. UpdateVsyncInterval(true); @@ -346,7 +344,6 @@ void GPU_GLES::DeviceRestore() { draw_ = (Draw::DrawContext *)PSP_CoreParameter().graphicsContext->GetDrawContext(); ILOG("GPU_GLES: DeviceRestore"); - UpdateCmdInfo(); UpdateVsyncInterval(true); textureCacheGL_->DeviceRestore(draw_); @@ -366,7 +363,6 @@ void GPU_GLES::InitClear() { void GPU_GLES::BeginHostFrame() { GPUCommon::BeginHostFrame(); - UpdateCmdInfo(); if (resized_) { CheckGPUFeatures(); framebufferManager_->Resized(); diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index bb06521ea..d1e6afb5d 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -108,21 +108,8 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_world, "u_world" }); queries.push_back({ &u_texmtx, "u_texmtx" }); - if (VSID.Bit(VS_BIT_ENABLE_BONES)) - numBones = TranslateNumBones(VSID.Bits(VS_BIT_BONES, 3) + 1); - else - numBones = 0; queries.push_back({ &u_depthRange, "u_depthRange" }); -#ifdef USE_BONE_ARRAY - queries.push_back({ &u_bone, "u_bone" }); -#else - static const char * const boneNames[8] = { "u_bone0", "u_bone1", "u_bone2", "u_bone3", "u_bone4", "u_bone5", "u_bone6", "u_bone7", }; - for (int i = 0; i < 8; i++) { - queries.push_back({ &u_bone[i], boneNames[i] }); - } -#endif - // Lighting, texturing queries.push_back({ &u_ambient, "u_ambient" }); queries.push_back({ &u_matambientalpha, "u_matambientalpha" }); @@ -475,13 +462,6 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) { float f = (float)gstate.getStencilTestRef() * (1.0f / 255.0f); render_->SetUniformF(&u_stencilReplaceValue, 1, &f); } - float bonetemp[16]; - for (int i = 0; i < numBones; i++) { - if (dirty & (DIRTY_BONEMATRIX0 << i)) { - ConvertMatrix4x3To4x4(bonetemp, gstate.boneMatrix + 12 * i); - render_->SetUniformM4x4(&u_bone[i], bonetemp); - } - } if (dirty & DIRTY_SHADERBLEND) { if (u_blendFixA != -1) { @@ -805,7 +785,7 @@ std::string ShaderManagerGLES::DebugGetShaderString(std::string id, DebugShaderT // as sometimes these features might have an effect on the ID bits. #define CACHE_HEADER_MAGIC 0x83277592 -#define CACHE_VERSION 5 +#define CACHE_VERSION 6 struct CacheHeader { uint32_t magic; uint32_t version; diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 77b94f9cd..981b63a47 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -72,13 +72,6 @@ public: int u_world; int u_depthRange; // x,y = viewport xscale/xcenter. z,w=clipping minz/maxz (?) -#ifdef USE_BONE_ARRAY - int u_bone; // array, size is numBones -#else - int u_bone[8]; -#endif - int numBones; - // Shader blending. int u_fbotex; int u_blendFixA; diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index f9de2d312..239134b46 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -38,30 +38,6 @@ #define WRITE p+=sprintf -static const char * const boneWeightAttrDecl[9] = { - "#ERROR#", - "attribute mediump float w1;\n", - "attribute mediump vec2 w1;\n", - "attribute mediump vec3 w1;\n", - "attribute mediump vec4 w1;\n", - "attribute mediump vec4 w1;\nattribute mediump float w2;\n", - "attribute mediump vec4 w1;\nattribute mediump vec2 w2;\n", - "attribute mediump vec4 w1;\nattribute mediump vec3 w2;\n", - "attribute mediump vec4 w1, w2;\n", -}; - -static const char * const boneWeightInDecl[9] = { - "#ERROR#", - "in mediump float w1;\n", - "in mediump vec2 w1;\n", - "in mediump vec3 w1;\n", - "in mediump vec4 w1;\n", - "in mediump vec4 w1;\nin mediump float w2;\n", - "in mediump vec4 w1;\nin mediump vec2 w2;\n", - "in mediump vec4 w1;\nin mediump vec3 w2;\n", - "in mediump vec4 w1, w2;\n", -}; - enum DoLightComputation { LIGHT_OFF, LIGHT_SHADE, @@ -105,7 +81,6 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, bool glslES30 = false; const char *varying = "varying"; const char *attribute = "attribute"; - const char * const * boneWeightDecl = boneWeightAttrDecl; const char *texelFetch = NULL; bool highpFog = false; bool highpTexcoord = false; @@ -158,7 +133,6 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, if (glslES30 || gl_extensions.IsCoreContext) { attribute = "in"; varying = "out"; - boneWeightDecl = boneWeightInDecl; } bool isModeThrough = id.Bit(VS_BIT_IS_THROUGH); @@ -182,7 +156,6 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, bool flipNormal = id.Bit(VS_BIT_NORM_REVERSE); int ls0 = id.Bits(VS_BIT_LS0, 2); int ls1 = id.Bits(VS_BIT_LS1, 2); - bool enableBones = id.Bit(VS_BIT_ENABLE_BONES); bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE); int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); @@ -208,16 +181,6 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, } } - int numBoneWeights = 0; - int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2); - if (enableBones) { - numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3); - WRITE(p, "%s", boneWeightDecl[numBoneWeights]); - *attrMask |= 1 << ATTR_W1; - if (numBoneWeights >= 5) - *attrMask |= 1 << ATTR_W2; - } - if (useHWTransform) WRITE(p, "%s vec3 position;\n", attribute); else @@ -268,17 +231,6 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, WRITE(p, "uniform mediump mat4 u_texmtx;\n"); *uniformMask |= DIRTY_TEXMATRIX; } - if (enableBones) { -#ifdef USE_BONE_ARRAY - WRITE(p, "uniform mediump mat4 u_bone[%i];\n", numBoneWeights); - *uniformMask |= DIRTY_BONE_UNIFORMS; -#else - for (int i = 0; i < numBoneWeights; i++) { - WRITE(p, "uniform mat4 u_bone%i;\n", i); - *uniformMask |= DIRTY_BONEMATRIX0 << i; - } -#endif - } if (doTexture) { WRITE(p, "uniform vec4 u_uvscaleoffset;\n"); *uniformMask |= DIRTY_UVSCALEOFFSET; @@ -485,7 +437,7 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, } } else { // Step 1: World Transform / Skinning - if (!enableBones) { + if (true) { // Hardware tessellation if (doBezier || doSpline) { WRITE(p, " vec3 _pos[16];\n"); @@ -592,81 +544,6 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, else WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } - } else { - static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; - const char *factor = rescale[boneWeightScale]; - - static const char * const boneWeightAttr[8] = { - "w1.x", "w1.y", "w1.z", "w1.w", - "w2.x", "w2.y", "w2.z", "w2.w", - }; - -#if defined(USE_FOR_LOOP) && defined(USE_BONE_ARRAY) - - // To loop through the weights, we unfortunately need to put them in a float array. - // GLSL ES sucks - no way to directly initialize an array! - switch (numBoneWeights) { - case 1: WRITE(p, " float w[1]; w[0] = w1;\n"); break; - case 2: WRITE(p, " float w[2]; w[0] = w1.x; w[1] = w1.y;\n"); break; - case 3: WRITE(p, " float w[3]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z;\n"); break; - case 4: WRITE(p, " float w[4]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z; w[3] = w1.w;\n"); break; - case 5: WRITE(p, " float w[5]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z; w[3] = w1.w; w[4] = w2;\n"); break; - case 6: WRITE(p, " float w[6]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z; w[3] = w1.w; w[4] = w2.x; w[5] = w2.y;\n"); break; - case 7: WRITE(p, " float w[7]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z; w[3] = w1.w; w[4] = w2.x; w[5] = w2.y; w[6] = w2.z;\n"); break; - case 8: WRITE(p, " float w[8]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z; w[3] = w1.w; w[4] = w2.x; w[5] = w2.y; w[6] = w2.z; w[7] = w2.w;\n"); break; - } - - WRITE(p, " mat4 skinMatrix = w[0] * u_bone[0];\n"); - if (numBoneWeights > 1) { - WRITE(p, " for (int i = 1; i < %i; i++) {\n", numBoneWeights); - WRITE(p, " skinMatrix += w[i] * u_bone[i];\n"); - WRITE(p, " }\n"); - } - -#else - -#ifdef USE_BONE_ARRAY - if (numBoneWeights == 1) - WRITE(p, " mat4 skinMatrix = w1 * u_bone[0]"); - else - WRITE(p, " mat4 skinMatrix = w1.x * u_bone[0]"); - for (int i = 1; i < numBoneWeights; i++) { - const char *weightAttr = boneWeightAttr[i]; - // workaround for "cant do .x of scalar" issue - if (numBoneWeights == 1 && i == 0) weightAttr = "w1"; - if (numBoneWeights == 5 && i == 4) weightAttr = "w2"; - WRITE(p, " + %s * u_bone[%i]", weightAttr, i); - } -#else - // Uncomment this to screw up bone shaders to check the vertex shader software fallback - // WRITE(p, "THIS SHOULD ERROR! #error"); - if (numBoneWeights == 1) - WRITE(p, " mat4 skinMatrix = w1 * u_bone0"); - else - WRITE(p, " mat4 skinMatrix = w1.x * u_bone0"); - for (int i = 1; i < numBoneWeights; i++) { - const char *weightAttr = boneWeightAttr[i]; - // workaround for "cant do .x of scalar" issue - if (numBoneWeights == 1 && i == 0) weightAttr = "w1"; - if (numBoneWeights == 5 && i == 4) weightAttr = "w2"; - WRITE(p, " + %s * u_bone%i", weightAttr, i); - } -#endif - -#endif - - WRITE(p, ";\n"); - - // Trying to simplify this results in bugs in LBP... - WRITE(p, " vec3 skinnedpos = (skinMatrix * vec4(position, 1.0)).xyz %s;\n", factor); - WRITE(p, " vec3 worldpos = (u_world * vec4(skinnedpos, 1.0)).xyz;\n"); - - if (hasNormal) { - WRITE(p, " mediump vec3 skinnednormal = (skinMatrix * vec4(%snormal, 0.0)).xyz %s;\n", flipNormal ? "-" : "", factor); - } else { - WRITE(p, " mediump vec3 skinnednormal = (skinMatrix * vec4(0.0, 0.0, %s1.0, 0.0)).xyz %s;\n", flipNormal ? "-" : "", factor); - } - WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(skinnednormal, 0.0)).xyz);\n"); } WRITE(p, " vec4 viewPos = u_view * vec4(worldpos, 1.0);\n"); diff --git a/GPU/GLES/VertexShaderGeneratorGLES.h b/GPU/GLES/VertexShaderGeneratorGLES.h index 7801b2cd6..ae78cad76 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.h +++ b/GPU/GLES/VertexShaderGeneratorGLES.h @@ -19,8 +19,6 @@ #include "Common/CommonTypes.h" -// #define USE_BONE_ARRAY - struct VShaderID; void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask, uint64_t *uniformMask); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 7640b7e79..2409b85c6 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -46,8 +46,8 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier }, { GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline }, - // Changing the vertex type requires us to flush. - { GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType }, + // Changing the vertex type does not always require us to flush so handle that in Execute_VertexType. + { GE_CMD_VERTEXTYPE, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType }, { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_LoadClut }, @@ -403,23 +403,11 @@ GPUCommon::GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i); } } - - UpdateCmdInfo(); } GPUCommon::~GPUCommon() { } -void GPUCommon::UpdateCmdInfo() { - if (g_Config.bSoftwareSkinning) { - cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE; - cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning; - } else { - cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE; - cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType; - } -} - void GPUCommon::BeginHostFrame() { ReapplyGfxState(); @@ -1425,22 +1413,12 @@ void GPUCommon::Execute_TexSize0(u32 op, u32 diff) { } } -void GPUCommon::Execute_VertexType(u32 op, u32 diff) { - if (diff) - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); - if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) { - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); - if (diff & GE_VTYPE_THROUGH_MASK) - gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE); - } -} - void GPUCommon::Execute_LoadClut(u32 op, u32 diff) { gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes()); } -void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) { +void GPUCommon::Execute_VertexType(u32 op, u32 diff) { // Don't flush when weight count changes. if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) { // Restore and flush @@ -1638,10 +1616,6 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) { indices = Memory::GetPointerUnchecked(gstate_c.indexAddr); } - if (vertTypeIsSkinningEnabled(gstate.vertType)) { - DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType)); - } - GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType(); SetDrawType(DRAW_BEZIER, PatchPrimToPrim(patchPrim)); @@ -1700,10 +1674,6 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) { indices = Memory::GetPointerUnchecked(gstate_c.indexAddr); } - if (vertTypeIsSkinningEnabled(gstate.vertType)) { - DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType)); - } - int sp_ucount = op & 0xFF; int sp_vcount = (op >> 8) & 0xFF; int sp_utype = (op >> 16) & 0x3; @@ -1984,34 +1954,10 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) { } if (fastLoad) { - // If we can't use software skinning, we have to flush and dirty. - if (!g_Config.bSoftwareSkinning) { - while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) { - const u32 newVal = src[i] << 8; - if (dst[i] != newVal) { - Flush(); - dst[i] = newVal; - } - if (++i >= end) { - break; - } - } - - const unsigned int numPlusCount = (op & 0x7F) + i; - for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) { - gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12)); - } - } else { - while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) { - dst[i] = src[i] << 8; - if (++i >= end) { - break; - } - } - - const unsigned int numPlusCount = (op & 0x7F) + i; - for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) { - gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12); + while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) { + dst[i] = src[i] << 8; + if (++i >= end) { + break; } } } @@ -2029,13 +1975,6 @@ void GPUCommon::Execute_BoneMtxData(u32 op, u32 diff) { int num = gstate.boneMatrixNumber & 0x7F; u32 newVal = op << 8; if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) { - // Bone matrices should NOT flush when software skinning is enabled! - if (!g_Config.bSoftwareSkinning) { - Flush(); - gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12)); - } else { - gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12); - } ((u32 *)gstate.boneMatrix)[num] = newVal; } num++; @@ -2175,17 +2114,6 @@ void GPUCommon::Execute_Unknown(u32 op, u32 diff) { void GPUCommon::FastLoadBoneMatrix(u32 target) { const int num = gstate.boneMatrixNumber & 0x7F; const int mtxNum = num / 12; - uint32_t uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum; - if ((num - 12 * mtxNum) != 0) { - uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7); - } - - if (!g_Config.bSoftwareSkinning) { - Flush(); - gstate_c.Dirty(uniformsToDirty); - } else { - gstate_c.deferredVertTypeDirty |= uniformsToDirty; - } gstate.FastLoadBoneMatrix(target); } diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 5b6b78ce4..52bcec023 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -72,8 +72,6 @@ public: } virtual void CheckGPUFeatures() = 0; - void UpdateCmdInfo(); - bool IsReady() override { return true; } @@ -131,7 +129,6 @@ public: void Execute_End(u32 op, u32 diff); void Execute_VertexType(u32 op, u32 diff); - void Execute_VertexTypeSkinning(u32 op, u32 diff); void Execute_Prim(u32 op, u32 diff); void Execute_Bezier(u32 op, u32 diff); diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index 7e3f3812e..19ae21706 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -240,13 +240,6 @@ void GPUgstate::Restore(u32_le *ptr) { } } -bool vertTypeIsSkinningEnabled(u32 vertType) { - if (g_Config.bSoftwareSkinning) - return false; - else - return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); -} - struct GPUStateCache_v0 { u32 vertexAddr; u32 indexAddr; diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 811f1fa62..aee7cfc57 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -441,8 +441,6 @@ struct GPUgstate { void Restore(u32_le *ptr); }; -bool vertTypeIsSkinningEnabled(u32 vertType); - inline int vertTypeGetNumBoneWeights(u32 vertType) { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); } inline int vertTypeGetWeightMask(u32 vertType) { return vertType & GE_VTYPE_WEIGHT_MASK; } inline int vertTypeGetTexCoordMask(u32 vertType) { return vertType & GE_VTYPE_TC_MASK; } diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index b49065982..39c13a0b7 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -395,10 +395,6 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { indices = Memory::GetPointerUnchecked(gstate_c.indexAddr); } - if ((gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) || vertTypeIsSkinningEnabled(gstate.vertType)) { - DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType)); - } - GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType(); SetDrawType(DRAW_BEZIER, PatchPrimToPrim(patchPrim)); @@ -443,10 +439,6 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { indices = Memory::GetPointerUnchecked(gstate_c.indexAddr); } - if ((gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) || vertTypeIsSkinningEnabled(gstate.vertType)) { - DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType)); - } - int sp_ucount = op & 0xFF; int sp_vcount = (op >> 8) & 0xFF; int sp_utype = (op >> 16) & 0x3; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 8f5505aa7..8a3bd0223 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -167,27 +167,6 @@ VertexData TransformUnit::ReadVertex(VertexReader& vreader) vertex.normal = -vertex.normal; } - if (vertTypeIsSkinningEnabled(gstate.vertType) && !gstate.isModeThrough()) { - float W[8] = { 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f }; - vreader.ReadWeights(W); - - Vec3 tmppos(0.f, 0.f, 0.f); - Vec3 tmpnrm(0.f, 0.f, 0.f); - - for (int i = 0; i < vertTypeGetNumBoneWeights(gstate.vertType); ++i) { - Mat3x3 bone(&gstate.boneMatrix[12*i]); - tmppos += (bone * ModelCoords(pos[0], pos[1], pos[2]) + Vec3(gstate.boneMatrix[12*i+9], gstate.boneMatrix[12*i+10], gstate.boneMatrix[12*i+11])) * W[i]; - if (vreader.hasNormal()) - tmpnrm += (bone * vertex.normal) * W[i]; - } - - pos[0] = tmppos.x; - pos[1] = tmppos.y; - pos[2] = tmppos.z; - if (vreader.hasNormal()) - vertex.normal = tmpnrm; - } - if (vreader.hasColor0()) { float col[4]; vreader.ReadColor0(col); diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index faba817b1..c3acc8926 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -66,8 +66,7 @@ enum { DRAW_BINDING_2ND_TEXTURE = 1, DRAW_BINDING_DYNUBO_BASE = 2, DRAW_BINDING_DYNUBO_LIGHT = 3, - DRAW_BINDING_DYNUBO_BONE = 4, - DRAW_BINDING_TESS_STORAGE_BUF = 5, + DRAW_BINDING_TESS_STORAGE_BUF = 4, }; enum { @@ -95,7 +94,7 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra void DrawEngineVulkan::InitDeviceObjects() { // All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated. - VkDescriptorSetLayoutBinding bindings[6]{}; + VkDescriptorSetLayoutBinding bindings[5]{}; bindings[0].descriptorCount = 1; bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; @@ -112,15 +111,11 @@ void DrawEngineVulkan::InitDeviceObjects() { bindings[3].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; bindings[3].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; bindings[3].binding = DRAW_BINDING_DYNUBO_LIGHT; - bindings[4].descriptorCount = 1; - bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - bindings[4].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[4].binding = DRAW_BINDING_DYNUBO_BONE; // Used only for hardware tessellation. - bindings[5].descriptorCount = 1; - bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[5].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[5].binding = DRAW_BINDING_TESS_STORAGE_BUF; + bindings[4].descriptorCount = 1; + bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[4].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[4].binding = DRAW_BINDING_TESS_STORAGE_BUF; VkDevice device = vulkan_->GetDevice(); @@ -134,7 +129,7 @@ void DrawEngineVulkan::InitDeviceObjects() { // if creating and updating them turns out to be expensive. for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { // We now create descriptor pools on demand, so removed from here. - frame_[i].pushUBO = new VulkanPushBuffer(vulkan_, 8 * 1024 * 1024); + frame_[i].pushUBO = new VulkanPushBuffer(vulkan_, 4 * 1024 * 1024); frame_[i].pushVertex = new VulkanPushBuffer(vulkan_, 2 * 1024 * 1024); frame_[i].pushIndex = new VulkanPushBuffer(vulkan_, 1 * 1024 * 1024); } @@ -364,7 +359,7 @@ VkResult DrawEngineVulkan::RecreateDescriptorPool(FrameData &frame, int newSize) frame.descPoolSize = newSize; VkDescriptorPoolSize dpTypes[3]; - dpTypes[0].descriptorCount = frame.descPoolSize * 3; + dpTypes[0].descriptorCount = frame.descPoolSize * 2; dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; dpTypes[1].descriptorCount = frame.descPoolSize * 2; // Don't use these for tess anymore, need max two per set. dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; @@ -382,17 +377,15 @@ VkResult DrawEngineVulkan::RecreateDescriptorPool(FrameData &frame, int newSize) return res; } -VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone, bool tess) { +VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, bool tess) { DescriptorSetKey key; key.imageView_ = imageView; key.sampler_ = sampler; key.secondaryImageView_ = boundSecondary_; key.base_ = base; key.light_ = light; - key.bone_ = bone; _dbg_assert_(G3D, base != VK_NULL_HANDLE); _dbg_assert_(G3D, light != VK_NULL_HANDLE); - _dbg_assert_(G3D, bone != VK_NULL_HANDLE); FrameData &frame = frame_[vulkan_->GetCurFrame()]; // See if we already have this descriptor set cached. @@ -494,7 +487,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView } // Uniform buffer objects - VkDescriptorBufferInfo buf[3]{}; + VkDescriptorBufferInfo buf[2]{}; int count = 0; buf[count].buffer = base; buf[count].offset = 0; @@ -504,10 +497,6 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView buf[count].offset = 0; buf[count].range = sizeof(UB_VS_Lights); count++; - buf[count].buffer = bone; - buf[count].offset = 0; - buf[count].range = sizeof(UB_VS_Bones); - count++; for (int i = 0; i < count; i++) { writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; writes[n].pNext = nullptr; @@ -531,11 +520,9 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView void DrawEngineVulkan::DirtyAllUBOs() { baseUBOOffset = 0; lightUBOOffset = 0; - boneUBOOffset = 0; baseBuf = VK_NULL_HANDLE; lightBuf = VK_NULL_HANDLE; - boneBuf = VK_NULL_HANDLE; - dirtyUniforms_ = DIRTY_BASE_UNIFORMS | DIRTY_LIGHT_UNIFORMS | DIRTY_BONE_UNIFORMS; + dirtyUniforms_ = DIRTY_BASE_UNIFORMS | DIRTY_LIGHT_UNIFORMS; imageView = VK_NULL_HANDLE; sampler = VK_NULL_HANDLE; gstate_c.Dirty(DIRTY_TEXTURE_IMAGE); @@ -594,7 +581,7 @@ void DrawEngineVulkan::DoFlush() { // Also avoid caching when software skinning. VkBuffer vbuf = VK_NULL_HANDLE; VkBuffer ibuf = VK_NULL_HANDLE; - if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) { + if (lastVType_ & GE_VTYPE_WEIGHT_MASK) { useCache = false; } @@ -736,7 +723,7 @@ void DrawEngineVulkan::DoFlush() { break; } } else { - if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) { + if (lastVType_ & GE_VTYPE_WEIGHT_MASK) { // If software skinning, we've already predecoded into "decoded". So push that content. VkDeviceSize size = decodedVerts_ * dec_->GetDecVtxFmt().stride; u8 *dest = (u8 *)frame->pushVertex->Push(size, &vbOffset, &vbuf); @@ -808,12 +795,12 @@ void DrawEngineVulkan::DoFlush() { dirtyUniforms_ |= shaderManager_->UpdateUniforms(); UpdateUBOs(frame); - VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess); + VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, tess); { PROFILE_THIS_SCOPE("renderman_q"); - const uint32_t dynamicUBOOffsets[3] = { - baseUBOOffset, lightUBOOffset, boneUBOOffset, + const uint32_t dynamicUBOOffsets[2] = { + baseUBOOffset, lightUBOOffset, }; int stride = dec_->GetDecVtxFmt().stride; @@ -914,9 +901,9 @@ void DrawEngineVulkan::DoFlush() { // Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered UpdateUBOs(frame); - VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess); - const uint32_t dynamicUBOOffsets[3] = { - baseUBOOffset, lightUBOOffset, boneUBOOffset, + VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, tess); + const uint32_t dynamicUBOOffsets[2] = { + baseUBOOffset, lightUBOOffset, }; PROFILE_THIS_SCOPE("renderman_q"); @@ -983,10 +970,6 @@ void DrawEngineVulkan::UpdateUBOs(FrameData *frame) { lightUBOOffset = shaderManager_->PushLightBuffer(frame->pushUBO, &lightBuf); dirtyUniforms_ &= ~DIRTY_LIGHT_UNIFORMS; } - if ((dirtyUniforms_ & DIRTY_BONE_UNIFORMS) || boneBuf == VK_NULL_HANDLE) { - boneUBOOffset = shaderManager_->PushBoneBuffer(frame->pushUBO, &boneBuf); - dirtyUniforms_ &= ~DIRTY_BONE_UNIFORMS; - } } DrawEngineVulkan::TessellationDataTransferVulkan::TessellationDataTransferVulkan(VulkanContext *vulkan, Draw::DrawContext *draw) diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index d39186837..8b58dbb86 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -23,7 +23,7 @@ // * binding 1: Secondary texture sampler for shader blending or depal palettes // * binding 2: Base Uniform Buffer (includes fragment state) // * binding 3: Light uniform buffer -// * binding 4: Bone uniform buffer +// * binding 4: Shader buffer storage for tesselation // // All shaders conform to this layout, so they are all compatible with the same descriptor set. // The format of the various uniform buffers may vary though - vertex shaders that don't skin @@ -195,7 +195,7 @@ private: void DoFlush(); void UpdateUBOs(FrameData *frame); - VkDescriptorSet GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone, bool tess); + VkDescriptorSet GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, bool tess); VulkanContext *vulkan_; Draw::DrawContext *draw_; @@ -219,7 +219,7 @@ private: VkImageView imageView_; VkImageView secondaryImageView_; VkSampler sampler_; - VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical + VkBuffer base_, light_; // All three UBO slots will be set to this. This will usually be identical // for all draws in a frame, except when the buffer has to grow. }; @@ -253,8 +253,7 @@ private: uint64_t dirtyUniforms_; uint32_t baseUBOOffset; uint32_t lightUBOOffset; - uint32_t boneUBOOffset; - VkBuffer baseBuf, lightBuf, boneBuf; + VkBuffer baseBuf, lightBuf; VkImageView imageView = VK_NULL_HANDLE; VkSampler sampler = VK_NULL_HANDLE; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 0aeefe849..f4b173177 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -190,7 +190,6 @@ void GPU_Vulkan::CheckGPUFeatures() { void GPU_Vulkan::BeginHostFrame() { drawEngine_.BeginFrame(); - UpdateCmdInfo(); if (resized_) { CheckGPUFeatures(); @@ -425,7 +424,6 @@ void GPU_Vulkan::DeviceRestore() { CheckGPUFeatures(); BuildReportingInfo(); - UpdateCmdInfo(); framebufferManagerVulkan_->DeviceRestore(vulkan_, draw_); vulkan2D_.DeviceRestore(vulkan_); diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index eabd8a182..06aa2ef80 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -292,9 +292,6 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip if (vs->HasLights()) { vulkanPipeline->uniformBlocks |= UB_VS_LIGHTS; } - if (vs->HasBones()) { - vulkanPipeline->uniformBlocks |= UB_VS_BONES; - } } return vulkanPipeline; } diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h index 846303378..846f9d84d 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.h +++ b/GPU/Vulkan/PipelineManagerVulkan.h @@ -56,8 +56,7 @@ struct VulkanPipelineKey { enum { UB_VS_FS_BASE = (1 << 0), - UB_VS_BONES = (1 << 1), - UB_VS_LIGHTS = (1 << 2), + UB_VS_LIGHTS = (1 << 1), }; // Simply wraps a Vulkan pipeline, providing some metadata. diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index bc6e467b7..4131a2404 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -158,11 +158,9 @@ ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan) uboAlignment_ = vulkan_->GetPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment; memset(&ub_base, 0, sizeof(ub_base)); memset(&ub_lights, 0, sizeof(ub_lights)); - memset(&ub_bones, 0, sizeof(ub_bones)); ILOG("sizeof(ub_base): %d", (int)sizeof(ub_base)); ILOG("sizeof(ub_lights): %d", (int)sizeof(ub_lights)); - ILOG("sizeof(ub_bones): %d", (int)sizeof(ub_bones)); } ShaderManagerVulkan::~ShaderManagerVulkan() { @@ -215,8 +213,6 @@ uint64_t ShaderManagerVulkan::UpdateUniforms() { BaseUpdateUniforms(&ub_base, dirty, false); if (dirty & DIRTY_LIGHT_UNIFORMS) LightUpdateUniforms(&ub_lights, dirty); - if (dirty & DIRTY_BONE_UNIFORMS) - BoneUpdateUniforms(&ub_bones, dirty); } gstate_c.CleanUniforms(); return dirty; diff --git a/GPU/Vulkan/ShaderManagerVulkan.h b/GPU/Vulkan/ShaderManagerVulkan.h index 0078b5cba..b8593d5d3 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.h +++ b/GPU/Vulkan/ShaderManagerVulkan.h @@ -63,9 +63,6 @@ public: bool Failed() const { return failed_; } bool UseHWTransform() const { return useHWTransform_; } - bool HasBones() const { - return id_.Bit(VS_BIT_ENABLE_BONES); - } bool HasLights() const { return usesLighting_; } @@ -110,7 +107,6 @@ public: // Applies dirty changes and copies the buffer. bool IsBaseDirty() { return true; } bool IsLightDirty() { return true; } - bool IsBoneDirty() { return true; } uint32_t PushBaseBuffer(VulkanPushBuffer *dest, VkBuffer *buf) { return dest->PushAligned(&ub_base, sizeof(ub_base), uboAlignment_, buf); @@ -118,10 +114,6 @@ public: uint32_t PushLightBuffer(VulkanPushBuffer *dest, VkBuffer *buf) { return dest->PushAligned(&ub_lights, sizeof(ub_lights), uboAlignment_, buf); } - // TODO: Only push half the bone buffer if we only have four bones. - uint32_t PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *buf) { - return dest->PushAligned(&ub_bones, sizeof(ub_bones), uboAlignment_, buf); - } private: void Clear(); @@ -140,7 +132,6 @@ private: // Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time. UB_VS_FS_Base ub_base; UB_VS_Lights ub_lights; - UB_VS_Bones ub_bones; VulkanFragmentShader *lastFShader_; VulkanVertexShader *lastVShader_; diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index 8aa8b1719..a2e00e55d 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -54,18 +54,6 @@ static const char *vulkan_glsl_preamble = #define WRITE p+=sprintf -static const char * const boneWeightDecl[9] = { - "#ERROR#", - "layout(location = 3) in float w1;\n", - "layout(location = 3) in vec2 w1;\n", - "layout(location = 3) in vec3 w1;\n", - "layout(location = 3) in vec4 w1;\n", - "layout(location = 3) in vec4 w1;\nlayout(location = 4) in float w2;\n", - "layout(location = 3) in vec4 w1;\nlayout(location = 4) in vec2 w2;\n", - "layout(location = 3) in vec4 w1;\nlayout(location = 4) in vec3 w2;\n", - "layout(location = 3) in vec4 w1;\nlayout(location = 4) in vec4 w2;\n", -}; - enum DoLightComputation { LIGHT_OFF, LIGHT_SHADE, @@ -126,7 +114,6 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer, bool *use bool flipNormal = id.Bit(VS_BIT_NORM_REVERSE); int ls0 = id.Bits(VS_BIT_LS0, 2); int ls1 = id.Bits(VS_BIT_LS1, 2); - bool enableBones = id.Bit(VS_BIT_ENABLE_BONES); bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE); int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); @@ -144,8 +131,6 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer, bool *use WRITE(p, "layout (std140, set = 0, binding = 2) uniform baseVars {\n%s} base;\n", ub_baseStr); if (enableLighting || doShadeMapping) WRITE(p, "layout (std140, set = 0, binding = 3) uniform lightVars {\n%s} light;\n", ub_vs_lightsStr); - if (enableBones) - WRITE(p, "layout (std140, set = 0, binding = 4) uniform boneVars {\n%s} bone;\n", ub_vs_bonesStr); const char *shading = doFlatShading ? "flat " : ""; @@ -161,13 +146,6 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer, bool *use } } - int numBoneWeights = 0; - int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2); - if (enableBones) { - numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3); - WRITE(p, "%s", boneWeightDecl[numBoneWeights]); - } - if (useHWTransform) WRITE(p, "layout (location = %d) in vec3 position;\n", (int)PspAttributeLocation::POSITION); else @@ -333,7 +311,7 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer, bool *use } } else { // Step 1: World Transform / Skinning - if (!enableBones) { + if (true) { if (doBezier || doSpline) { WRITE(p, " vec3 _pos[16];\n"); WRITE(p, " vec2 _tex[16];\n"); @@ -439,34 +417,6 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer, bool *use else WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } - } else { - static const char *rescale[4] = { "", " * 1.9921875", " * 1.999969482421875", "" }; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; - const char *factor = rescale[boneWeightScale]; - - static const char * const boneWeightAttr[8] = { - "w1.x", "w1.y", "w1.z", "w1.w", - "w2.x", "w2.y", "w2.z", "w2.w", - }; - - WRITE(p, " mat3x4 skinMatrix = w1.x * bone.m[0];\n"); - if (numBoneWeights > 1) { - for (int i = 1; i < numBoneWeights; i++) { - WRITE(p, " skinMatrix += %s * bone.m[%i];\n", boneWeightAttr[i], i); - } - } - - WRITE(p, ";\n"); - - // Trying to simplify this results in bugs in LBP... - WRITE(p, " vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix) %s;\n", factor); - WRITE(p, " vec3 worldpos = vec4(skinnedpos, 1.0) * base.world_mtx;\n"); - - if (hasNormal) { - WRITE(p, " mediump vec3 skinnednormal = vec4(%snormal, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor); - } else { - WRITE(p, " mediump vec3 skinnednormal = vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor); - } - WRITE(p, " mediump vec3 worldnormal = normalize(vec4(skinnednormal, 0.0) * base.world_mtx);\n"); } WRITE(p, " vec4 viewPos = vec4(vec4(worldpos, 1.0) * base.view_mtx, 1.0);\n"); diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index c42427622..5b3e4a270 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -298,13 +298,6 @@ void GameSettingsScreen::CreateViews() { hwTransform->OnClick.Handle(this, &GameSettingsScreen::OnHardwareTransform); hwTransform->SetDisabledPtr(&g_Config.bSoftwareRendering); - CheckBox *swSkin = graphicsSettings->Add(new CheckBox(&g_Config.bSoftwareSkinning, gr->T("Software Skinning"))); - swSkin->OnClick.Add([=](EventParams &e) { - settingInfo_->Show(gr->T("SoftwareSkinning Tip", "Combine skinned model draws on the CPU, faster in most games"), e.v); - return UI::EVENT_CONTINUE; - }); - swSkin->SetDisabledPtr(&g_Config.bSoftwareRendering); - CheckBox *vtxCache = graphicsSettings->Add(new CheckBox(&g_Config.bVertexCache, gr->T("Vertex Cache"))); vtxCache->OnClick.Add([=](EventParams &e) { settingInfo_->Show(gr->T("VertexCache Tip", "Faster, but may cause temporary flicker"), e.v); diff --git a/ext/native/thin3d/VulkanQueueRunner.h b/ext/native/thin3d/VulkanQueueRunner.h index b52b150fa..807d1e47e 100644 --- a/ext/native/thin3d/VulkanQueueRunner.h +++ b/ext/native/thin3d/VulkanQueueRunner.h @@ -41,7 +41,7 @@ struct VkRenderData { VkPipelineLayout pipelineLayout; VkDescriptorSet ds; int numUboOffsets; - uint32_t uboOffsets[3]; + uint32_t uboOffsets[2]; VkBuffer vbuffer; // might need to increase at some point VkDeviceSize voffset; VkBuffer ibuffer; diff --git a/headless/Headless.cpp b/headless/Headless.cpp index 661211d1d..883f60853 100644 --- a/headless/Headless.cpp +++ b/headless/Headless.cpp @@ -369,7 +369,6 @@ int main(int argc, const char* argv[]) g_Config.bFrameSkipUnthrottle = false; g_Config.bEnableLogging = fullLog; g_Config.iNumWorkerThreads = 1; - g_Config.bSoftwareSkinning = true; g_Config.bVertexDecoderJit = true; g_Config.bBlockTransferGPU = true; g_Config.iSplineBezierQuality = 2; diff --git a/unittest/TestVertexJit.cpp b/unittest/TestVertexJit.cpp index 40dd640c8..dc964c456 100644 --- a/unittest/TestVertexJit.cpp +++ b/unittest/TestVertexJit.cpp @@ -543,7 +543,6 @@ static bool TestVertexColor565() { static bool TestVertex8Skin() { VertexDecoderTestHarness dec; - g_Config.bSoftwareSkinning = true; for (int i = 0; i < 8 * 12; ++i) { gstate.boneMatrix[i] = 0.0f; } @@ -573,7 +572,6 @@ static bool TestVertex8Skin() { static bool TestVertex16Skin() { VertexDecoderTestHarness dec; - g_Config.bSoftwareSkinning = true; for (int i = 0; i < 8 * 12; ++i) { gstate.boneMatrix[i] = 0.0f; } @@ -603,7 +601,6 @@ static bool TestVertex16Skin() { static bool TestVertexFloatSkin() { VertexDecoderTestHarness dec; - g_Config.bSoftwareSkinning = true; for (int i = 0; i < 8 * 12; ++i) { gstate.boneMatrix[i] = 0.0f; }