mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-24 00:29:57 +00:00
Merge pull request #10454 from unknownbrackets/gpu-minor
Vulkan: Use depth clamping, where available
This commit is contained in:
commit
ea50561c80
@ -508,6 +508,9 @@ float DepthSliceFactor() {
|
||||
if (gstate_c.Supports(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
|
||||
return DEPTH_SLICE_FACTOR_16BIT;
|
||||
}
|
||||
if (gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) {
|
||||
return 1.0f;
|
||||
}
|
||||
return DEPTH_SLICE_FACTOR_HIGH;
|
||||
}
|
||||
|
||||
@ -681,6 +684,7 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
|
||||
// So, we apply the depth range as minz/maxz, and transform for the viewport.
|
||||
float vpZScale = gstate.getViewportZScale();
|
||||
float vpZCenter = gstate.getViewportZCenter();
|
||||
// TODO: This clip the entire draw if minz > maxz.
|
||||
float minz = gstate.getDepthRangeMin();
|
||||
float maxz = gstate.getDepthRangeMax();
|
||||
|
||||
|
@ -182,7 +182,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
|
||||
{ GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE },
|
||||
{ GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
|
||||
{ GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
|
||||
{ GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE },
|
||||
{ GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE },
|
||||
|
||||
// Z clip
|
||||
{ GE_CMD_MINZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_VIEWPORTSCISSOR_STATE },
|
||||
@ -1739,7 +1739,6 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
|
||||
const int end = 12 * 8 - (op & 0x7F);
|
||||
int i = 0;
|
||||
|
||||
// TODO: Validate what should happen when explicitly setting num to 96 or higher.
|
||||
bool fastLoad = !debugRecording_ && end > 0;
|
||||
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
|
||||
fastLoad = false;
|
||||
|
118
GPU/GPUState.cpp
118
GPU/GPUState.cpp
@ -37,6 +37,9 @@ alignas(16) GPUgstate gstate;
|
||||
// Let's align this one too for good measure.
|
||||
alignas(16) GPUStateCache gstate_c;
|
||||
|
||||
// For save state compatibility.
|
||||
static int savedContextVersion = 1;
|
||||
|
||||
struct CmdRange {
|
||||
u8 start;
|
||||
u8 end;
|
||||
@ -77,6 +80,25 @@ static const CmdRange contextCmdRanges[] = {
|
||||
// Skip: {0xFA, 0xFF},
|
||||
};
|
||||
|
||||
static u32_le *SaveMatrix(u32_le *cmds, const float *mtx, int sz, int numcmd, int datacmd) {
|
||||
*cmds++ = numcmd << 24;
|
||||
for (int i = 0; i < sz; ++i) {
|
||||
*cmds++ = (datacmd << 24) | toFloat24(mtx[i]);
|
||||
}
|
||||
|
||||
return cmds;
|
||||
}
|
||||
|
||||
static const u32_le *LoadMatrix(const u32_le *cmds, float *mtx, int sz) {
|
||||
// Skip the reset.
|
||||
cmds++;
|
||||
for (int i = 0; i < sz; ++i) {
|
||||
mtx[i] = getFloat24(*cmds++);
|
||||
}
|
||||
|
||||
return cmds;
|
||||
}
|
||||
|
||||
void GPUgstate::Reset() {
|
||||
memset(gstate.cmdmem, 0, sizeof(gstate.cmdmem));
|
||||
for (int i = 0; i < 256; i++) {
|
||||
@ -89,6 +111,8 @@ void GPUgstate::Reset() {
|
||||
memset(gstate.projMatrix, 0, sizeof(gstate.projMatrix));
|
||||
memset(gstate.tgenMatrix, 0, sizeof(gstate.tgenMatrix));
|
||||
memset(gstate.boneMatrix, 0, sizeof(gstate.boneMatrix));
|
||||
|
||||
savedContextVersion = 1;
|
||||
}
|
||||
|
||||
void GPUgstate::Save(u32_le *ptr) {
|
||||
@ -105,22 +129,37 @@ void GPUgstate::Save(u32_le *ptr) {
|
||||
}
|
||||
}
|
||||
|
||||
if (Memory::IsValidAddress(getClutAddress()))
|
||||
*cmds++ = loadclut;
|
||||
if (savedContextVersion == 0) {
|
||||
if (Memory::IsValidAddress(getClutAddress()))
|
||||
*cmds++ = loadclut;
|
||||
|
||||
// Seems like it actually writes commands to load the matrices and then reset the counts.
|
||||
*cmds++ = boneMatrixNumber;
|
||||
*cmds++ = worldmtxnum;
|
||||
*cmds++ = viewmtxnum;
|
||||
*cmds++ = projmtxnum;
|
||||
*cmds++ = texmtxnum;
|
||||
// Seems like it actually writes commands to load the matrices and then reset the counts.
|
||||
*cmds++ = boneMatrixNumber;
|
||||
*cmds++ = worldmtxnum;
|
||||
*cmds++ = viewmtxnum;
|
||||
*cmds++ = projmtxnum;
|
||||
*cmds++ = texmtxnum;
|
||||
|
||||
u8 *matrices = (u8 *)cmds;
|
||||
memcpy(matrices, boneMatrix, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
|
||||
memcpy(matrices, worldMatrix, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
|
||||
memcpy(matrices, viewMatrix, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
|
||||
memcpy(matrices, projMatrix, sizeof(projMatrix)); matrices += sizeof(projMatrix);
|
||||
memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
|
||||
u8 *matrices = (u8 *)cmds;
|
||||
memcpy(matrices, boneMatrix, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
|
||||
memcpy(matrices, worldMatrix, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
|
||||
memcpy(matrices, viewMatrix, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
|
||||
memcpy(matrices, projMatrix, sizeof(projMatrix)); matrices += sizeof(projMatrix);
|
||||
memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
|
||||
} else {
|
||||
cmds = SaveMatrix(cmds, boneMatrix, ARRAY_SIZE(boneMatrix), GE_CMD_BONEMATRIXNUMBER, GE_CMD_BONEMATRIXDATA);
|
||||
cmds = SaveMatrix(cmds, worldMatrix, ARRAY_SIZE(worldMatrix), GE_CMD_WORLDMATRIXNUMBER, GE_CMD_WORLDMATRIXDATA);
|
||||
cmds = SaveMatrix(cmds, viewMatrix, ARRAY_SIZE(viewMatrix), GE_CMD_VIEWMATRIXNUMBER, GE_CMD_VIEWMATRIXDATA);
|
||||
cmds = SaveMatrix(cmds, projMatrix, ARRAY_SIZE(projMatrix), GE_CMD_PROJMATRIXNUMBER, GE_CMD_PROJMATRIXDATA);
|
||||
cmds = SaveMatrix(cmds, tgenMatrix, ARRAY_SIZE(tgenMatrix), GE_CMD_TGENMATRIXNUMBER, GE_CMD_TGENMATRIXDATA);
|
||||
|
||||
*cmds++ = boneMatrixNumber;
|
||||
*cmds++ = worldmtxnum;
|
||||
*cmds++ = viewmtxnum;
|
||||
*cmds++ = projmtxnum;
|
||||
*cmds++ = texmtxnum;
|
||||
*cmds++ = GE_CMD_END << 24;
|
||||
}
|
||||
}
|
||||
|
||||
void GPUgstate::FastLoadBoneMatrix(u32 addr) {
|
||||
@ -165,27 +204,41 @@ void GPUgstate::Restore(u32_le *ptr) {
|
||||
gstate_c.offsetAddr = ptr[7];
|
||||
|
||||
// Command values start 17 ints in.
|
||||
u32_le *cmds = ptr + 17;
|
||||
const u32_le *cmds = ptr + 17;
|
||||
for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) {
|
||||
for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) {
|
||||
cmdmem[n] = *cmds++;
|
||||
}
|
||||
}
|
||||
|
||||
if (Memory::IsValidAddress(getClutAddress()))
|
||||
loadclut = *cmds++;
|
||||
boneMatrixNumber = *cmds++;
|
||||
worldmtxnum = *cmds++;
|
||||
viewmtxnum = *cmds++;
|
||||
projmtxnum = *cmds++;
|
||||
texmtxnum = *cmds++;
|
||||
if (savedContextVersion == 0) {
|
||||
if (Memory::IsValidAddress(getClutAddress()))
|
||||
loadclut = *cmds++;
|
||||
boneMatrixNumber = *cmds++;
|
||||
worldmtxnum = *cmds++;
|
||||
viewmtxnum = *cmds++;
|
||||
projmtxnum = *cmds++;
|
||||
texmtxnum = *cmds++;
|
||||
|
||||
u8 *matrices = (u8 *)cmds;
|
||||
memcpy(boneMatrix, matrices, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
|
||||
memcpy(worldMatrix, matrices, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
|
||||
memcpy(viewMatrix, matrices, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
|
||||
memcpy(projMatrix, matrices, sizeof(projMatrix)); matrices += sizeof(projMatrix);
|
||||
memcpy(tgenMatrix, matrices, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
|
||||
u8 *matrices = (u8 *)cmds;
|
||||
memcpy(boneMatrix, matrices, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
|
||||
memcpy(worldMatrix, matrices, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
|
||||
memcpy(viewMatrix, matrices, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
|
||||
memcpy(projMatrix, matrices, sizeof(projMatrix)); matrices += sizeof(projMatrix);
|
||||
memcpy(tgenMatrix, matrices, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
|
||||
} else {
|
||||
cmds = LoadMatrix(cmds, boneMatrix, ARRAY_SIZE(boneMatrix));
|
||||
cmds = LoadMatrix(cmds, worldMatrix, ARRAY_SIZE(worldMatrix));
|
||||
cmds = LoadMatrix(cmds, viewMatrix, ARRAY_SIZE(viewMatrix));
|
||||
cmds = LoadMatrix(cmds, projMatrix, ARRAY_SIZE(projMatrix));
|
||||
cmds = LoadMatrix(cmds, tgenMatrix, ARRAY_SIZE(tgenMatrix));
|
||||
|
||||
boneMatrixNumber = *cmds++;
|
||||
worldmtxnum = *cmds++;
|
||||
viewmtxnum = *cmds++;
|
||||
projmtxnum = *cmds++;
|
||||
texmtxnum = *cmds++;
|
||||
}
|
||||
}
|
||||
|
||||
bool vertTypeIsSkinningEnabled(u32 vertType) {
|
||||
@ -217,7 +270,7 @@ void GPUStateCache::Reset() {
|
||||
}
|
||||
|
||||
void GPUStateCache::DoState(PointerWrap &p) {
|
||||
auto s = p.Section("GPUStateCache", 0, 4);
|
||||
auto s = p.Section("GPUStateCache", 0, 5);
|
||||
if (!s) {
|
||||
// Old state, this was not versioned.
|
||||
GPUStateCache_v0 old;
|
||||
@ -231,6 +284,8 @@ void GPUStateCache::DoState(PointerWrap &p) {
|
||||
vertexFullAlpha = old.vertexFullAlpha;
|
||||
skipDrawReason = old.skipDrawReason;
|
||||
uv = old.uv;
|
||||
|
||||
savedContextVersion = 0;
|
||||
} else {
|
||||
p.Do(vertexAddr);
|
||||
p.Do(indexAddr);
|
||||
@ -290,4 +345,9 @@ void GPUStateCache::DoState(PointerWrap &p) {
|
||||
p.Do(curRTHeight);
|
||||
|
||||
// curRTBufferWidth, curRTBufferHeight, and cutRTOffsetX don't need to be saved.
|
||||
if (s < 5) {
|
||||
savedContextVersion = 0;
|
||||
} else {
|
||||
p.Do(savedContextVersion);
|
||||
}
|
||||
}
|
||||
|
@ -477,6 +477,7 @@ enum {
|
||||
GPU_SUPPORTS_VERTEX_TEXTURE_FETCH = FLAG_BIT(11),
|
||||
GPU_SUPPORTS_TEXTURE_FLOAT = FLAG_BIT(12),
|
||||
GPU_SUPPORTS_16BIT_FORMATS = FLAG_BIT(13),
|
||||
GPU_SUPPORTS_DEPTH_CLAMP = FLAG_BIT(14),
|
||||
GPU_SUPPORTS_LARGE_VIEWPORTS = FLAG_BIT(16),
|
||||
GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17),
|
||||
GPU_SUPPORTS_VAO = FLAG_BIT(18),
|
||||
|
@ -255,7 +255,7 @@ void ProcessLine(VertexData& v0, VertexData& v1)
|
||||
return;
|
||||
}
|
||||
|
||||
if (mask && (gstate.clipEnable & 0x1)) {
|
||||
if (mask && gstate.isClippingEnabled()) {
|
||||
// discard if any vertex is outside the near clipping plane
|
||||
if (mask & CLIP_NEG_Z_BIT)
|
||||
return;
|
||||
@ -303,7 +303,7 @@ void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2)
|
||||
mask |= CalcClipMask(v1.clippos);
|
||||
mask |= CalcClipMask(v2.clippos);
|
||||
|
||||
if (mask && (gstate.clipEnable & 0x1)) {
|
||||
if (mask && gstate.isClippingEnabled()) {
|
||||
// discard if any vertex is outside the near clipping plane
|
||||
if (mask & CLIP_NEG_Z_BIT)
|
||||
return;
|
||||
|
@ -104,8 +104,8 @@ static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool *
|
||||
float y = coords.y * yScale / coords.w + yCenter;
|
||||
float z = coords.z * zScale / coords.w + zCenter;
|
||||
|
||||
// Is this really right?
|
||||
if (gstate.clipEnable & 0x1) {
|
||||
// This matches hardware tests - depth is clamped when this flag is on.
|
||||
if (gstate.isClippingEnabled()) {
|
||||
if (z < 0.f)
|
||||
z = 0.f;
|
||||
if (z > 65535.f)
|
||||
|
@ -456,8 +456,20 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
|
||||
}
|
||||
|
||||
if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
|
||||
const double scale = DepthSliceFactor() * 65535.0;
|
||||
|
||||
WRITE(p, " highp float z = gl_FragCoord.z;\n");
|
||||
WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n");
|
||||
if (gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
|
||||
// We center the depth with an offset, but only its fraction matters.
|
||||
// When (DepthSliceFactor() - 1) is odd, it will be 0.5, otherwise 0.
|
||||
if (((int)(DepthSliceFactor() - 1.0f) & 1) == 1) {
|
||||
WRITE(p, " z = (floor((z * %f) - (1.0 / 2.0)) + (1.0 / 2.0)) * (1.0 / %f);\n", scale, scale);
|
||||
} else {
|
||||
WRITE(p, " z = floor(z * %f) * (1.0 / %f);\n", scale, scale);
|
||||
}
|
||||
} else {
|
||||
WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n");
|
||||
}
|
||||
WRITE(p, " gl_FragDepth = z;\n");
|
||||
}
|
||||
|
||||
|
@ -209,6 +209,9 @@ void GPU_Vulkan::CheckGPUFeatures() {
|
||||
if (vulkan_->GetFeaturesEnabled().wideLines) {
|
||||
features |= GPU_SUPPORTS_WIDE_LINES;
|
||||
}
|
||||
if (vulkan_->GetFeaturesEnabled().depthClamp) {
|
||||
features |= GPU_SUPPORTS_DEPTH_CLAMP;
|
||||
}
|
||||
if (vulkan_->GetFeaturesEnabled().dualSrcBlend) {
|
||||
switch (vulkan_->GetPhysicalDeviceProperties().vendorID) {
|
||||
case VULKAN_VENDOR_NVIDIA:
|
||||
|
@ -188,7 +188,7 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip
|
||||
rs.lineWidth = lineWidth;
|
||||
rs.rasterizerDiscardEnable = false;
|
||||
rs.polygonMode = VK_POLYGON_MODE_FILL;
|
||||
rs.depthClampEnable = false;
|
||||
rs.depthClampEnable = key.depthClampEnable;
|
||||
|
||||
VkPipelineMultisampleStateCreateInfo ms = { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO };
|
||||
ms.pSampleMask = nullptr;
|
||||
|
@ -241,10 +241,13 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
|
||||
if (gstate_c.IsDirty(DIRTY_RASTER_STATE)) {
|
||||
if (gstate.isModeClear()) {
|
||||
key.cullMode = VK_CULL_MODE_NONE;
|
||||
// TODO: Or does it always clamp?
|
||||
key.depthClampEnable = false;
|
||||
} else {
|
||||
// Set cull
|
||||
bool wantCull = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled();
|
||||
key.cullMode = wantCull ? (gstate.getCullMode() ? VK_CULL_MODE_FRONT_BIT : VK_CULL_MODE_BACK_BIT) : VK_CULL_MODE_NONE;
|
||||
key.depthClampEnable = gstate.isClippingEnabled() && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@ struct VulkanDynamicState {
|
||||
// Let's pack this tight using bitfields.
|
||||
// If an enable flag is set to 0, all the data fields for that section should
|
||||
// also be set to 0.
|
||||
// ~54 bits.
|
||||
// ~64 bits.
|
||||
// Can't use enums unfortunately, they end up signed and breaking values above half their ranges.
|
||||
struct VulkanPipelineRasterStateKey {
|
||||
// Blend
|
||||
@ -37,6 +37,7 @@ struct VulkanPipelineRasterStateKey {
|
||||
unsigned int colorWriteMask : 4;
|
||||
|
||||
// Depth/Stencil
|
||||
unsigned int depthClampEnable : 1;
|
||||
unsigned int depthTestEnable : 1;
|
||||
unsigned int depthWriteEnable : 1;
|
||||
unsigned int depthCompareOp : 3; // VkCompareOp
|
||||
@ -57,4 +58,4 @@ struct VulkanPipelineRasterStateKey {
|
||||
size_t size = sizeof(VulkanPipelineRasterStateKey);
|
||||
return memcmp(this, &other, size) < 0;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
@ -373,6 +373,7 @@ int main(int argc, const char* argv[])
|
||||
g_Config.bVertexDecoderJit = true;
|
||||
g_Config.bBlockTransferGPU = true;
|
||||
g_Config.iSplineBezierQuality = 2;
|
||||
g_Config.bHighQualityDepth = true;
|
||||
|
||||
#ifdef _WIN32
|
||||
InitSysDirectories();
|
||||
|
Loading…
Reference in New Issue
Block a user