Fix clearing bug, add FIFO_RELAXED flag, etc

This commit is contained in:
Henrik Rydgard 2016-01-06 23:08:26 +01:00
parent ddb36a35a0
commit 01e3fbae95
13 changed files with 97 additions and 72 deletions

View File

@ -68,6 +68,8 @@ void PSP_BeginHostFrame();
void PSP_EndHostFrame();
void PSP_RunLoopUntil(u64 globalticks);
void PSP_RunLoopFor(int cycles);
void PSP_BeginFrame();
void PSP_EndFrame();
void Audio_Init();

View File

@ -171,10 +171,13 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan)
void DrawEngineVulkan::BeginFrame() {
FrameData *frame = &frame_[curFrame_ & 1];
vkResetDescriptorPool(vulkan_->GetDevice(), frame->descPool, 0);
frame->pushData->Begin(vulkan_->GetDevice());
frame->pushData->Reset();
}
void DrawEngineVulkan::EndFrame() {
FrameData *frame = &frame_[curFrame_ & 1];
frame->pushData->End(vulkan_->GetDevice());
curFrame_++;
}
@ -471,16 +474,20 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
if (result.action == SW_DRAW_PRIMITIVES) {
if (result.setStencil) {
// dxstate.stencilFunc.set(D3DCMP_ALWAYS, result.stencilValue, 255);
// hey, dynamic state!
vkCmdSetStencilReference(cmd_, VK_STENCIL_FRONT_AND_BACK, result.stencilValue);
}
VkBuffer buf[1] = {};
VkDeviceSize offsets[1] = { 0 };
ibOffset = (uint32_t)frame->pushData->Push(decIndex, 2 * indexGen.VertexCount());
vbOffset = (uint32_t)frame->pushData->Push(decoded, numTrans * dec_->GetDecVtxFmt().stride);
VkBuffer buf[1] = { frame->pushData->GetVkBuffer() };
VkDeviceSize offsets[1] = { vbOffset };
if (drawIndexed) {
// TODO: Have a buffer per frame, use a walking buffer pointer
// TODO: Avoid rebinding if the vertex size stays the same by using the offset arguments
vkCmdBindVertexBuffers(cmd_, 0, 1, buf, offsets);
vkCmdBindIndexBuffer(cmd_, buf[0], 0, VK_INDEX_TYPE_UINT16);
vkCmdBindIndexBuffer(cmd_, buf[0], ibOffset, VK_INDEX_TYPE_UINT16);
vkCmdDrawIndexed(cmd_, numTrans, 1, 0, 0, 0);
// pD3Ddevice->DrawIndexedPrimitiveUP(glprim[prim], 0, maxIndex, D3DPrimCount(glprim[prim], numTrans), inds, D3DFMT_INDEX16, drawBuffer, sizeof(TransformedVertex));
} else {
@ -497,13 +504,13 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
if (gstate.isClearModeAlphaMask()) mask |= 2;
if (gstate.isClearModeDepthMask()) mask |= 4;
VkClearValue value;
value.color.float32[0] = (result.color & 0xFF) * (1.0f / 255.0f);
value.color.float32[1] = ((result.color >> 8) & 0xFF) * (1.0f / 255.0f);
value.color.float32[2] = ((result.color >> 16) & 0xFF) * (1.0f / 255.0f);
value.color.float32[3] = ((result.color >> 24) & 0xFF) * (1.0f / 255.0f);
value.depthStencil.depth = result.depth;
value.depthStencil.stencil = (result.color >> 24) & 0xFF;
VkClearValue colorValue, depthValue;
colorValue.color.float32[0] = (result.color & 0xFF) * (1.0f / 255.0f);
colorValue.color.float32[1] = ((result.color >> 8) & 0xFF) * (1.0f / 255.0f);
colorValue.color.float32[2] = ((result.color >> 16) & 0xFF) * (1.0f / 255.0f);
colorValue.color.float32[3] = ((result.color >> 24) & 0xFF) * (1.0f / 255.0f);
depthValue.depthStencil.depth = result.depth;
depthValue.depthStencil.stencil = (result.color >> 24) & 0xFF;
VkClearRect rect;
rect.baseArrayLayer = 0;
@ -517,13 +524,13 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
VkClearAttachment attach[2];
if (mask & 3) {
attach[count].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
attach[count].clearValue = value;
attach[count].clearValue = colorValue;
attach[count].colorAttachment = 0;
count++;
}
if (mask & 4) {
attach[count].aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
attach[count].clearValue = value;
attach[count].clearValue = depthValue;
attach[count].colorAttachment = 0;
}
vkCmdClearAttachments(cmd_, count, attach, 1, &rect);

View File

@ -69,6 +69,23 @@ void FramebufferManagerVulkan::FlushBeforeCopy() {
drawEngine_->Flush(nullptr);
}
void FramebufferManagerVulkan::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) {
}
void FramebufferManagerVulkan::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
}
bool FramebufferManagerVulkan::CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) {
return false;
}
void FramebufferManagerVulkan::UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) {
}
std::vector<FramebufferInfo> FramebufferManagerVulkan::GetFramebufferList() {
return std::vector<FramebufferInfo>();
}

View File

@ -52,9 +52,9 @@ public:
return false;
}
virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override {
throw std::logic_error("The method or operation is not implemented.");
}
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override;
void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) override;
virtual void MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) override {
}
@ -92,16 +92,6 @@ public:
virtual void NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) override {
}
void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) override {
}
bool CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) override {
return false;
}
void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override {
}
void DestroyAllFBOs();
void Resized();
void DeviceLost();
@ -111,6 +101,10 @@ public:
std::vector<FramebufferInfo> GetFramebufferList();
protected:
bool CreateDownloadTempBuffer(VirtualFramebuffer *nvfb) override;
void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override;
private:
VulkanContext *vulkan_;
VkCommandBuffer cmd_;

View File

@ -578,6 +578,7 @@ void GPU_Vulkan::BeginFrameInternal() {
shaderManager_->DirtyUniform(DIRTY_ALL);
framebufferManager_.BeginFrame();
drawEngine_.BeginFrame();
if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) {
// Draw everything directly to the backbuffer.
@ -627,12 +628,14 @@ void GPU_Vulkan::CopyDisplayToOutput() {
void GPU_Vulkan::CopyDisplayToOutputInternal() {
// Flush anything left over.
drawEngine_.Flush(curCmd_);
drawEngine_.EndFrame();
shaderManager_->DirtyLastShader();
framebufferManager_.CopyDisplayToOutput();
framebufferManager_.EndFrame();
gstate_c.textureChanged = TEXCHANGE_UPDATED;
}

View File

@ -144,8 +144,7 @@ std::string VulkanVertexShader::GetShaderString(DebugShaderStringType type) cons
}
}
// Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it
static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invertedY, bool invertedZ) {
static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invertedY) {
// Half pixel offset hack
float xoff = 0.5f / gstate_c.curRTRenderWidth;
xoff = gstate_c.vpXOffset + (invertedX ? xoff : -xoff);
@ -157,7 +156,9 @@ static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invert
if (invertedY)
yoff = -yoff;
in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(gstate_c.vpWidthScale, gstate_c.vpHeightScale, invertedZ ? -0.5 : 0.5f));
const Vec3 trans(xoff, yoff, gstate_c.vpZOffset + 0.5f);
const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
in.translateAndScale(trans, scale);
}
static void ConvertProjMatrixToVulkanThrough(Matrix4x4 &in) {
@ -228,9 +229,8 @@ void ShaderManagerVulkan::VSUpdateUniforms(int dirtyUniforms) {
flippedMatrix[8] = -flippedMatrix[8];
flippedMatrix[12] = -flippedMatrix[12];
}
const bool invertedZ = gstate_c.vpDepthScale < 0;
ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY, invertedZ);
ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY);
CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr());
}
@ -357,18 +357,22 @@ void ShaderManagerVulkan::VSUpdateUniforms(int dirtyUniforms) {
if (dirtyUniforms & DIRTY_DEPTHRANGE) {
float viewZScale = gstate.getViewportZScale();
float viewZCenter = gstate.getViewportZCenter();
// Given the way we do the rounding, the integer part of the offset is probably mostly irrelevant as we cancel
// it afterwards anyway.
// It seems that we should adjust for D3D projection matrix. We got squashed up to only 0-1, so we divide
// the scale factor by 2, and add an offset. But, this doesn't work! I get near-perfect results not doing it.
// viewZScale *= 2.0f;
// Need to take the possibly inverted proj matrix into account.
if (gstate_c.vpDepthScale < 0.0)
viewZScale *= -1.0f;
viewZCenter -= 32767.5f;
float viewZInvScale;
// We had to scale and translate Z to account for our clamped Z range.
// Therefore, we also need to reverse this to round properly.
//
// Example: scale = 65535.0, center = 0.0
// Resulting range = -65535 to 65535, clamped to [0, 65535]
// gstate_c.vpDepthScale = 2.0f
// gstate_c.vpZOffset = -1.0f
//
// The projection already accounts for those, so we need to reverse them.
//
// Additionally, D3D9 uses a range from [0, 1]. We double and move the center.
viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;
if (viewZScale != 0.0) {
viewZInvScale = 1.0f / viewZScale;
} else {

View File

@ -279,17 +279,13 @@ void ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManager, int prim, Vulk
float depthMin = vpAndScissor.depthRangeMin;
float depthMax = vpAndScissor.depthRangeMax;
if (!gstate.isModeThrough()) {
// Direct3D can't handle negative depth ranges, so we fix it in the projection matrix.
if (gstate_c.vpDepthScale != depthMax - depthMin) {
gstate_c.vpDepthScale = depthMax - depthMin;
vpAndScissor.dirtyProj = true;
}
if (depthMin > depthMax) {
std::swap(depthMin, depthMax);
}
if (depthMin < 0.0f) depthMin = 0.0f;
if (depthMax > 1.0f) depthMax = 1.0f;
if (depthMin < 0.0f) depthMin = 0.0f;
if (depthMax > 1.0f) depthMax = 1.0f;
if (vpAndScissor.dirtyProj) {
// shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
}
if (vpAndScissor.dirtyDepth) {
// shaderManager_->DirtyUniform(DIRTY_DEPTHRANGE);
}
}

View File

@ -89,6 +89,10 @@ TextureCacheVulkan::~TextureCacheVulkan() {
Clear(true);
}
void TextureCacheVulkan::DownloadFramebufferForClut(u32 clutAddr, u32 bytes) {
}
static u32 EstimateTexMemoryUsage(const TextureCacheVulkan::TexCacheEntry *entry) {
const u16 dim = entry->dim;
const u8 dimW = ((dim >> 0) & 0xf);

View File

@ -72,13 +72,8 @@ public:
void ApplyTexture(VkImageView &imageView, VkSampler &sampler);
bool DecodeTexture(u8 *dest, const GPUgstate &state) {
return false;
}
void DownloadFramebufferForClut(u32 clutAddr, u32 bytes) override {
}
protected:
void DownloadFramebufferForClut(u32 clutAddr, u32 bytes);
private:
void Decimate(); // Run this once per frame to get rid of old textures.

View File

@ -908,6 +908,7 @@ void EmuScreen::render() {
while (coreState == CORE_RUNNING) {
PSP_RunLoopFor(blockTicks);
}
// Hopefully coreState is now CORE_NEXTFRAME
if (coreState == CORE_NEXTFRAME) {
// set back to running for the next frame

View File

@ -703,6 +703,7 @@ void NativeRender(GraphicsContext *graphicsContext) {
ortho.setOrthoD3D(0.0f, xres, 0, yres, -1.0f, 1.0f);
break;
case GPUBackend::DIRECT3D9:
case GPUBackend::DIRECT3D11:
ortho.setOrthoD3D(0.0f, xres, yres, 0.0f, -1.0f, 1.0f);
Matrix4x4 translation;
translation.setTranslation(Vec3(-0.5f, -0.5f, 0.0f));

View File

@ -194,10 +194,8 @@ VkCommandBuffer VulkanContext::BeginSurfaceRenderPass(VkClearValue clear_values[
// Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
// Now, I wonder if we should do this early in the frame or late? Right now we do it early, which should be fine.
VkResult res = fpAcquireNextImageKHR(device_, swap_chain_,
UINT64_MAX,
acquireSemaphore,
NULL,
&current_buffer);
UINT64_MAX, acquireSemaphore, NULL, &current_buffer);
// TODO: Deal with the VK_SUBOPTIMAL_KHR and VK_ERROR_OUT_OF_DATE_KHR
// return codes
assert(res == VK_SUCCESS);
@ -917,28 +915,30 @@ void VulkanContext::InitSwapchain(VkCommandBuffer cmd) {
VkExtent2D swapChainExtent;
// width and height are either both -1, or both not -1.
if (surfCapabilities.currentExtent.width == -1)
{
if (surfCapabilities.currentExtent.width == -1) {
// If the surface size is undefined, the size is set to
// the size of the images requested.
swapChainExtent.width = width;
swapChainExtent.height = height;
} else
{
} else {
// If the surface size is defined, the swap chain size must match
swapChainExtent = surfCapabilities.currentExtent;
}
// If mailbox mode is available, use it, as is the lowest-latency non-
// tearing mode. If not, try IMMEDIATE which will usually be available,
// and is fastest (though it tears). If not, fall back to FIFO which is
// always available.
// tearing mode. If not, try FIFO_RELAXED, and if not that, try IMMEDIATE
// which will usually be available, and is fastest (though it tears).
// If not, fall back to FIFO which is always available.
VkPresentModeKHR swapchainPresentMode = VK_PRESENT_MODE_FIFO_KHR;
for (size_t i = 0; i < presentModeCount; i++) {
if ((flags_ & VULKAN_FLAG_PRESENT_MAILBOX) && presentModes[i] == VK_PRESENT_MODE_MAILBOX_KHR) {
swapchainPresentMode = VK_PRESENT_MODE_MAILBOX_KHR;
break;
}
if ((flags_ & VULKAN_FLAG_PRESENT_FIFO_RELAXED) && presentModes[i] == VK_PRESENT_MODE_FIFO_RELAXED_KHR) {
swapchainPresentMode = VK_PRESENT_MODE_FIFO_RELAXED_KHR;
break;
}
if ((flags_ & VULKAN_FLAG_PRESENT_IMMEDIATE) && presentModes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR) {
swapchainPresentMode = VK_PRESENT_MODE_IMMEDIATE_KHR;
break;

View File

@ -57,6 +57,7 @@ enum {
VULKAN_FLAG_VALIDATE = 1,
VULKAN_FLAG_PRESENT_MAILBOX = 2,
VULKAN_FLAG_PRESENT_IMMEDIATE = 4,
VULKAN_FLAG_PRESENT_FIFO_RELAXED = 8,
};
// A layer can expose extensions, keep track of those extensions here.