ppsspp/GPU/Vulkan/DrawEngineVulkan.cpp

639 lines
27 KiB
C++

// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "ppsspp_config.h"
#include <algorithm>
#include <functional>
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Profiler/Profiler.h"
#include "Common/GPU/Vulkan/VulkanRenderManager.h"
#include "Common/Log.h"
#include "Common/MemoryUtil.h"
#include "Common/TimeUtil.h"
#include "Core/MemMap.h"
#include "Core/System.h"
#include "Core/Config.h"
#include "Core/CoreTiming.h"
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanMemory.h"
#include "GPU/Common/SplineCommon.h"
#include "GPU/Common/TransformCommon.h"
#include "GPU/Common/VertexDecoderCommon.h"
#include "GPU/Common/SoftwareTransformCommon.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/Debugger/Debugger.h"
#include "GPU/Vulkan/DrawEngineVulkan.h"
#include "GPU/Vulkan/TextureCacheVulkan.h"
#include "GPU/Vulkan/ShaderManagerVulkan.h"
#include "GPU/Vulkan/PipelineManagerVulkan.h"
#include "GPU/Vulkan/FramebufferManagerVulkan.h"
#include "GPU/Vulkan/GPU_Vulkan.h"
using namespace PPSSPP_VK;
enum {
TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex)
};
DrawEngineVulkan::DrawEngineVulkan(Draw::DrawContext *draw)
: draw_(draw) {
decOptions_.expandAllWeightsToFloat = false;
decOptions_.expand8BitNormalsToFloat = false;
}
void DrawEngineVulkan::InitDeviceObjects() {
// All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
BindingType bindingTypes[VKRPipelineLayout::MAX_DESC_SET_BINDINGS] = {
BindingType::COMBINED_IMAGE_SAMPLER, // main
BindingType::COMBINED_IMAGE_SAMPLER, // framebuffer-read
BindingType::COMBINED_IMAGE_SAMPLER, // palette
BindingType::UNIFORM_BUFFER_DYNAMIC_ALL, // uniforms
BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX, // lights
BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX, // bones
BindingType::STORAGE_BUFFER_VERTEX, // tess
BindingType::STORAGE_BUFFER_VERTEX,
BindingType::STORAGE_BUFFER_VERTEX,
};
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
VkDevice device = vulkan->GetDevice();
VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
pipelineLayout_ = renderManager->CreatePipelineLayout(bindingTypes, ARRAY_SIZE(bindingTypes), draw_->GetDeviceCaps().geometryShaderSupported, "drawengine_layout");
pushUBO_ = (VulkanPushPool *)draw_->GetNativeObject(Draw::NativeObject::PUSH_POOL);
pushVertex_ = new VulkanPushPool(vulkan, "pushVertex", 4 * 1024 * 1024, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
pushIndex_ = new VulkanPushPool(vulkan, "pushIndex", 1 * 512 * 1024, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
VkSamplerCreateInfo samp{ VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO };
samp.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samp.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samp.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samp.magFilter = VK_FILTER_LINEAR;
samp.minFilter = VK_FILTER_LINEAR;
samp.maxLod = VK_LOD_CLAMP_NONE; // recommended by best practices, has no effect since we don't use mipmaps.
VkResult res = vkCreateSampler(device, &samp, nullptr, &samplerSecondaryLinear_);
samp.magFilter = VK_FILTER_NEAREST;
samp.minFilter = VK_FILTER_NEAREST;
res = vkCreateSampler(device, &samp, nullptr, &samplerSecondaryNearest_);
_dbg_assert_(VK_SUCCESS == res);
res = vkCreateSampler(device, &samp, nullptr, &nullSampler_);
_dbg_assert_(VK_SUCCESS == res);
tessDataTransferVulkan = new TessellationDataTransferVulkan(vulkan);
tessDataTransfer = tessDataTransferVulkan;
draw_->SetInvalidationCallback(std::bind(&DrawEngineVulkan::Invalidate, this, std::placeholders::_1));
}
DrawEngineVulkan::~DrawEngineVulkan() {
DestroyDeviceObjects();
}
void DrawEngineVulkan::DestroyDeviceObjects() {
if (!draw_) {
// We've already done this from LostDevice.
return;
}
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
draw_->SetInvalidationCallback(InvalidationCallback());
delete tessDataTransferVulkan;
tessDataTransfer = nullptr;
tessDataTransferVulkan = nullptr;
pushUBO_ = nullptr;
if (pushVertex_) {
pushVertex_->Destroy();
delete pushVertex_;
pushVertex_ = nullptr;
}
if (pushIndex_) {
pushIndex_->Destroy();
delete pushIndex_;
pushIndex_ = nullptr;
}
if (samplerSecondaryNearest_ != VK_NULL_HANDLE)
vulkan->Delete().QueueDeleteSampler(samplerSecondaryNearest_);
if (samplerSecondaryLinear_ != VK_NULL_HANDLE)
vulkan->Delete().QueueDeleteSampler(samplerSecondaryLinear_);
if (nullSampler_ != VK_NULL_HANDLE)
vulkan->Delete().QueueDeleteSampler(nullSampler_);
renderManager->DestroyPipelineLayout(pipelineLayout_);
}
void DrawEngineVulkan::DeviceLost() {
DestroyDeviceObjects();
DirtyAllUBOs();
draw_ = nullptr;
}
void DrawEngineVulkan::DeviceRestore(Draw::DrawContext *draw) {
draw_ = draw;
InitDeviceObjects();
}
void DrawEngineVulkan::BeginFrame() {
lastPipeline_ = nullptr;
// These will be re-bound if needed, let's not let old bindings linger around too long.
boundDepal_ = VK_NULL_HANDLE;
boundSecondary_ = VK_NULL_HANDLE;
// pushUBO is the thin3d push pool, don't need to BeginFrame again.
pushVertex_->BeginFrame();
pushIndex_->BeginFrame();
tessDataTransferVulkan->SetPushPool(pushUBO_);
DirtyAllUBOs();
AssertEmpty();
}
void DrawEngineVulkan::EndFrame() {
stats_.pushVertexSpaceUsed = (int)pushVertex_->GetUsedThisFrame();
stats_.pushIndexSpaceUsed = (int)pushIndex_->GetUsedThisFrame();
AssertEmpty();
}
void DrawEngineVulkan::DirtyAllUBOs() {
baseUBOOffset = 0;
lightUBOOffset = 0;
boneUBOOffset = 0;
baseBuf = VK_NULL_HANDLE;
lightBuf = VK_NULL_HANDLE;
boneBuf = VK_NULL_HANDLE;
dirtyUniforms_ = DIRTY_BASE_UNIFORMS | DIRTY_LIGHT_UNIFORMS | DIRTY_BONE_UNIFORMS;
imageView = VK_NULL_HANDLE;
sampler = VK_NULL_HANDLE;
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
}
void DrawEngineVulkan::Invalidate(InvalidationCallbackFlags flags) {
if (flags & InvalidationCallbackFlags::COMMAND_BUFFER_STATE) {
// Nothing here anymore (removed the "frame descriptor set"
// If we add back "seldomly-changing" descriptors, we might use this again.
}
if (flags & InvalidationCallbackFlags::RENDER_PASS_STATE) {
// If have a new render pass, dirty our dynamic state so it gets re-set.
//
// Dirty everything that has dynamic state that will need re-recording.
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
lastPipeline_ = nullptr;
}
}
// The inline wrapper in the header checks for numDrawCalls_ == 0
void DrawEngineVulkan::DoFlush() {
VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
renderManager->AssertInRenderPass();
PROFILE_THIS_SCOPE("Flush");
bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE;
bool textureNeedsApply = false;
if (gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS) && !gstate.isModeClear() && gstate.isTextureMapEnabled()) {
textureCache_->SetTexture();
gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
// NOTE: After this is set, we MUST call ApplyTexture before returning.
textureNeedsApply = true;
} else if (gstate.getTextureAddress(0) == (gstate.getFrameBufRawAddress() | 0x04000000)) {
// This catches the case of clearing a texture.
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
}
GEPrimitiveType prim = prevPrim_;
// Always use software for flat shading to fix the provoking index
// if the provoking vertex extension is not available.
bool provokingVertexOk = (tess || gstate.getShadeMode() != GE_SHADE_FLAT);
if (renderManager->GetVulkanContext()->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast) {
provokingVertexOk = true;
}
bool useHWTransform = CanUseHardwareTransform(prim) && provokingVertexOk;
uint32_t ibOffset;
uint32_t vbOffset;
// The optimization to avoid indexing isn't really worth it on Vulkan since it means creating more pipelines.
// This could be avoided with the new dynamic state extensions, but not available enough on mobile.
const bool forceIndexed = draw_->GetDeviceCaps().verySlowShaderCompiler;
if (useHWTransform) {
VkBuffer vbuf = VK_NULL_HANDLE;
VkBuffer ibuf = VK_NULL_HANDLE;
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
// If software skinning, we're predecoding into "decoded". So make sure we're done, then push that content.
DecodeVerts(decoded_);
VkDeviceSize size = numDecodedVerts_ * dec_->GetDecVtxFmt().stride;
u8 *dest = (u8 *)pushVertex_->Allocate(size, 4, &vbuf, &vbOffset);
memcpy(dest, decoded_, size);
} else {
// Figure out how much pushbuffer space we need to allocate.
int vertsToDecode = ComputeNumVertsToDecode();
// Decode directly into the pushbuffer
u8 *dest = pushVertex_->Allocate(vertsToDecode * dec_->GetDecVtxFmt().stride, 4, &vbuf, &vbOffset);
DecodeVerts(dest);
}
int vertexCount;
int maxIndex;
bool useElements;
DecodeIndsAndGetData(&prim, &vertexCount, &maxIndex, &useElements, false);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
} else {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
if (textureNeedsApply) {
textureCache_->ApplyTexture();
textureCache_->GetVulkanHandles(imageView, sampler);
if (imageView == VK_NULL_HANDLE)
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.textureIsArray ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
if (sampler == VK_NULL_HANDLE)
sampler = nullSampler_;
}
if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE) || prim != lastPrim_) {
if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) {
ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
}
VulkanVertexShader *vshader = nullptr;
VulkanFragmentShader *fshader = nullptr;
VulkanGeometryShader *gshader = nullptr;
shaderManager_->GetShaders(prim, dec_, &vshader, &fshader, &gshader, pipelineState_, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode);
_dbg_assert_msg_(vshader->UseHWTransform(), "Bad vshader");
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, gshader, true, 0, framebufferManager_->GetMSAALevel(), false);
if (!pipeline || !pipeline->pipeline) {
// Already logged, let's bail out.
ResetAfterDraw();
return;
}
BindShaderBlendTex(); // This might cause copies so important to do before BindPipeline.
if (!renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_)) {
renderManager->ReportBadStateForDraw();
ResetAfterDraw();
return;
}
if (pipeline != lastPipeline_) {
if (lastPipeline_ && !(lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant())) {
gstate_c.Dirty(DIRTY_BLEND_STATE);
}
lastPipeline_ = pipeline;
}
ApplyDrawStateLate(renderManager, false, 0, pipeline->UsesBlendConstant());
gstate_c.Clean(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
gstate_c.Dirty(dirtyRequiresRecheck_);
dirtyRequiresRecheck_ = 0;
lastPipeline_ = pipeline;
}
lastPrim_ = prim;
dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
UpdateUBOs();
int descCount = 6;
if (tess)
descCount = 9;
int descSetIndex;
PackedDescriptor *descriptors = renderManager->PushDescriptorSet(descCount, &descSetIndex);
descriptors[0].image.view = imageView;
descriptors[0].image.sampler = sampler;
descriptors[1].image.view = boundSecondary_;
descriptors[1].image.sampler = samplerSecondaryNearest_;
descriptors[2].image.view = boundDepal_;
descriptors[2].image.sampler = (boundDepal_ && boundDepalSmoothed_) ? samplerSecondaryLinear_ : samplerSecondaryNearest_;
descriptors[3].buffer.buffer = baseBuf;
descriptors[3].buffer.range = sizeof(UB_VS_FS_Base);
descriptors[3].buffer.offset = 0;
descriptors[4].buffer.buffer = lightBuf;
descriptors[4].buffer.range = sizeof(UB_VS_Lights);
descriptors[4].buffer.offset = 0;
descriptors[5].buffer.buffer = boneBuf;
descriptors[5].buffer.range = sizeof(UB_VS_Bones);
descriptors[5].buffer.offset = 0;
if (tess) {
const VkDescriptorBufferInfo *bufInfo = tessDataTransferVulkan->GetBufferInfo();
for (int j = 0; j < 3; j++) {
descriptors[j + 6].buffer.buffer = bufInfo[j].buffer;
descriptors[j + 6].buffer.range = bufInfo[j].range;
descriptors[j + 6].buffer.offset = bufInfo[j].offset;
}
}
// TODO: Can we avoid binding all three when not needed? Same below for hardware transform.
// Think this will require different descriptor set layouts.
const uint32_t dynamicUBOOffsets[3] = {
baseUBOOffset, lightUBOOffset, boneUBOOffset,
};
if (useElements) {
if (!ibuf) {
ibOffset = (uint32_t)pushIndex_->Push(decIndex_, sizeof(uint16_t) * vertexCount, 4, &ibuf);
}
renderManager->DrawIndexed(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1);
} else {
renderManager->Draw(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, vertexCount);
}
} else {
PROFILE_THIS_SCOPE("soft");
if (!decOptions_.applySkinInDecode) {
decOptions_.applySkinInDecode = true;
lastVType_ |= (1 << 26);
dec_ = GetVertexDecoder(lastVType_);
}
int prevDecodedVerts = numDecodedVerts_;
DecodeVerts(decoded_);
int vertexCount = DecodeInds();
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
} else {
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
gpuStats.numUncachedVertsDrawn += vertexCount;
prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
// At this point, the output is always an index triangle/line/point list, no strips/fans.
u16 *inds = decIndex_;
SoftwareTransformResult result{};
SoftwareTransformParams params{};
params.decoded = decoded_;
params.transformed = transformed_;
params.transformedExpanded = transformedExpanded_;
params.fbman = framebufferManager_;
params.texCache = textureCache_;
// In Vulkan, we have to force drawing of primitives if !framebufferManager_->UseBufferedRendering() because Vulkan clears
// do not respect scissor rects.
params.allowClear = framebufferManager_->UseBufferedRendering();
params.allowSeparateAlphaClear = false;
if (gstate.getShadeMode() == GE_SHADE_FLAT) {
if (!renderManager->GetVulkanContext()->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast) {
// If we can't have the hardware do it, we need to rotate the index buffer to simulate a different provoking vertex.
// We do this before line expansion etc.
IndexBufferProvokingLastToFirst(prim, inds, vertexCount);
}
}
params.flippedY = true;
params.usesHalfZ = true;
// We need to update the viewport early because it's checked for flipping in SoftwareTransform.
// We don't have a "DrawStateEarly" in vulkan, so...
// TODO: Probably should eventually refactor this and feed the vp size into SoftwareTransform directly (Unknown's idea).
if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
ViewportAndScissor vpAndScissor;
ConvertViewportAndScissor(framebufferManager_->UseBufferedRendering(),
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
vpAndScissor);
UpdateCachedViewportState(vpAndScissor);
}
SoftwareTransform swTransform(params);
const Lin::Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
const Lin::Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
swTransform.SetProjMatrix(gstate.projMatrix, gstate_c.vpWidth < 0, gstate_c.vpHeight < 0, trans, scale);
swTransform.Transform(prim, dec_->VertexType(), dec_->GetDecVtxFmt(), numDecodedVerts_, &result);
// Non-zero depth clears are unusual, but some drivers don't match drawn depth values to cleared values.
// Games sometimes expect exact matches (see #12626, for example) for equal comparisons.
if (result.action == SW_CLEAR && everUsedEqualDepth_ && gstate.isClearModeDepthMask() && result.depth > 0.0f && result.depth < 1.0f)
result.action = SW_NOT_READY;
if (result.action == SW_NOT_READY) {
// decIndex_ here is always equal to inds currently, but it may not be in the future.
swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, RemainingIndices(inds), numDecodedVerts_, VERTEX_BUFFER_MAX, &result);
}
if (result.setSafeSize)
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
// Only here, where we know whether to clear or to draw primitives, should we actually set the current framebuffer! Because that gives use the opportunity
// to use a "pre-clear" render pass, for high efficiency on tilers.
if (result.action == SW_DRAW_INDEXED) {
if (textureNeedsApply) {
gstate_c.pixelMapped = result.pixelMapped;
textureCache_->ApplyTexture();
gstate_c.pixelMapped = false;
textureCache_->GetVulkanHandles(imageView, sampler);
if (imageView == VK_NULL_HANDLE)
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.textureIsArray ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
if (sampler == VK_NULL_HANDLE)
sampler = nullSampler_;
}
if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE) || prim != lastPrim_) {
if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) {
ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
}
VulkanVertexShader *vshader = nullptr;
VulkanFragmentShader *fshader = nullptr;
VulkanGeometryShader *gshader = nullptr;
shaderManager_->GetShaders(prim, dec_, &vshader, &fshader, &gshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, true);
_dbg_assert_msg_(!vshader->UseHWTransform(), "Bad vshader");
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, gshader, false, 0, framebufferManager_->GetMSAALevel(), false);
if (!pipeline || !pipeline->pipeline) {
// Already logged, let's bail out.
ResetAfterDraw();
return;
}
BindShaderBlendTex(); // This might cause copies so super important to do before BindPipeline.
if (!renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_)) {
renderManager->ReportBadStateForDraw();
ResetAfterDraw();
return;
}
if (pipeline != lastPipeline_) {
if (lastPipeline_ && !lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant()) {
gstate_c.Dirty(DIRTY_BLEND_STATE);
}
lastPipeline_ = pipeline;
}
ApplyDrawStateLate(renderManager, result.setStencil, result.stencilValue, pipeline->UsesBlendConstant());
gstate_c.Clean(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
gstate_c.Dirty(dirtyRequiresRecheck_);
dirtyRequiresRecheck_ = 0;
lastPipeline_ = pipeline;
}
lastPrim_ = prim;
dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
// Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered
UpdateUBOs();
int descCount = 6;
int descSetIndex;
PackedDescriptor *descriptors = renderManager->PushDescriptorSet(descCount, &descSetIndex);
descriptors[0].image.view = imageView;
descriptors[0].image.sampler = sampler;
descriptors[1].image.view = boundSecondary_;
descriptors[1].image.sampler = samplerSecondaryNearest_;
descriptors[2].image.view = boundDepal_;
descriptors[2].image.sampler = (boundDepal_ && boundDepalSmoothed_) ? samplerSecondaryLinear_ : samplerSecondaryNearest_;
descriptors[3].buffer.buffer = baseBuf;
descriptors[3].buffer.range = sizeof(UB_VS_FS_Base);
descriptors[4].buffer.buffer = lightBuf;
descriptors[4].buffer.range = sizeof(UB_VS_Lights);
descriptors[5].buffer.buffer = boneBuf;
descriptors[5].buffer.range = sizeof(UB_VS_Bones);
const uint32_t dynamicUBOOffsets[3] = {
baseUBOOffset, lightUBOOffset, boneUBOOffset,
};
PROFILE_THIS_SCOPE("renderman_q");
VkBuffer vbuf, ibuf;
vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, numDecodedVerts_ * sizeof(TransformedVertex), 4, &vbuf);
ibOffset = (uint32_t)pushIndex_->Push(inds, sizeof(short) * result.drawNumTrans, 4, &ibuf);
renderManager->DrawIndexed(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1);
} else if (result.action == SW_CLEAR) {
// Note: we won't get here if the clear is alpha but not color, or color but not alpha.
bool clearColor = gstate.isClearModeColorMask();
bool clearAlpha = gstate.isClearModeAlphaMask(); // and stencil
bool clearDepth = gstate.isClearModeDepthMask();
int mask = 0;
// The Clear detection takes care of doing a regular draw instead if separate masking
// of color and alpha is needed, so we can just treat them as the same.
if (clearColor || clearAlpha) mask |= Draw::FBChannel::FB_COLOR_BIT;
if (clearDepth) mask |= Draw::FBChannel::FB_DEPTH_BIT;
if (clearAlpha) mask |= Draw::FBChannel::FB_STENCIL_BIT;
// Note that since the alpha channel and the stencil channel are shared on the PSP,
// when we clear alpha, we also clear stencil to the same value.
draw_->Clear(mask, result.color, result.depth, result.color >> 24);
if (clearColor || clearAlpha) {
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
}
if (gstate_c.Use(GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) {
int scissorX1 = gstate.getScissorX1();
int scissorY1 = gstate.getScissorY1();
int scissorX2 = gstate.getScissorX2() + 1;
int scissorY2 = gstate.getScissorY2() + 1;
framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, result.color);
}
}
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
}
ResetAfterDrawInline();
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
GPUDebug::NotifyDraw();
}
void DrawEngineVulkan::ResetAfterDraw() {
indexGen.Reset();
numDecodedVerts_ = 0;
numDrawVerts_ = 0;
numDrawInds_ = 0;
vertexCountInDrawCalls_ = 0;
decodeIndsCounter_ = 0;
decodeVertsCounter_ = 0;
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
gstate_c.vertexFullAlpha = true;
}
void DrawEngineVulkan::UpdateUBOs() {
if ((dirtyUniforms_ & DIRTY_BASE_UNIFORMS) || baseBuf == VK_NULL_HANDLE) {
baseUBOOffset = shaderManager_->PushBaseBuffer(pushUBO_, &baseBuf);
dirtyUniforms_ &= ~DIRTY_BASE_UNIFORMS;
}
if ((dirtyUniforms_ & DIRTY_LIGHT_UNIFORMS) || lightBuf == VK_NULL_HANDLE) {
lightUBOOffset = shaderManager_->PushLightBuffer(pushUBO_, &lightBuf);
dirtyUniforms_ &= ~DIRTY_LIGHT_UNIFORMS;
}
if ((dirtyUniforms_ & DIRTY_BONE_UNIFORMS) || boneBuf == VK_NULL_HANDLE) {
boneUBOOffset = shaderManager_->PushBoneBuffer(pushUBO_, &boneBuf);
dirtyUniforms_ &= ~DIRTY_BONE_UNIFORMS;
}
}
void TessellationDataTransferVulkan::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
// SSBOs that are not simply float1 or float2 need to be padded up to a float4 size. vec3 members
// also need to be 16-byte aligned, hence the padding.
struct TessData {
float pos[3]; float pad1;
float uv[2]; float pad2[2];
float color[4];
};
int size = size_u * size_v;
int ssboAlignment = vulkan_->GetPhysicalDeviceProperties().properties.limits.minStorageBufferOffsetAlignment;
uint8_t *data = (uint8_t *)push_->Allocate(size * sizeof(TessData), ssboAlignment, &bufInfo_[0].buffer, (uint32_t *)&bufInfo_[0].offset);
bufInfo_[0].range = size * sizeof(TessData);
float *pos = (float *)(data);
float *tex = (float *)(data + offsetof(TessData, uv));
float *col = (float *)(data + offsetof(TessData, color));
int stride = sizeof(TessData) / sizeof(float);
CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
using Spline::Weight;
// Weights U
data = (uint8_t *)push_->Allocate(weights.size_u * sizeof(Weight), ssboAlignment, &bufInfo_[1].buffer, (uint32_t *)&bufInfo_[1].offset);
memcpy(data, weights.u, weights.size_u * sizeof(Weight));
bufInfo_[1].range = weights.size_u * sizeof(Weight);
// Weights V
data = (uint8_t *)push_->Allocate(weights.size_v * sizeof(Weight), ssboAlignment, &bufInfo_[2].buffer, (uint32_t *)&bufInfo_[2].offset);
memcpy(data, weights.v, weights.size_v * sizeof(Weight));
bufInfo_[2].range = weights.size_v * sizeof(Weight);
}