Only dirty the uniform UVSCALEOFFSET when really needed

Broken out from #17479

With OpenGL, greatly reduces the amount of glUniform4fv calls in many games (and
similar in the other backends).
This commit is contained in:
Henrik Rydgård 2023-05-25 14:46:33 +02:00
parent 82934b9212
commit 80e47b7bd3
8 changed files with 28 additions and 23 deletions

View File

@ -236,7 +236,7 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex
// Code checks this reg directly, not just the vtype ID.
if (!prevThrough) {
gstate.vertType |= GE_VTYPE_THROUGH;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
}
int bytesRead;
@ -246,7 +246,7 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex
if (!prevThrough) {
gstate.vertType &= ~GE_VTYPE_THROUGH;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
}
}

View File

@ -377,7 +377,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
id.SetBit(FS_BIT_COLOR_WRITEMASK, colorWriteMask);
// All framebuffers are array textures in Vulkan now.
if (gstate_c.arrayTexture && g_Config.iGPUBackend == (int)GPUBackend::VULKAN) {
if (gstate_c.textureIsArray && g_Config.iGPUBackend == (int)GPUBackend::VULKAN) {
id.SetBit(FS_BIT_SAMPLE_ARRAY_TEXTURE);
}

View File

@ -406,6 +406,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
Unbind();
gstate_c.SetTextureIs3D(false);
gstate_c.SetTextureIsArray(false);
gstate_c.SetTextureIsFramebuffer(false);
return nullptr;
}
@ -573,6 +574,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
gstate_c.SetTextureIsArray(false);
gstate_c.SetTextureIsBGRA((entry->status & TexCacheEntry::STATUS_BGRA) != 0);
gstate_c.SetTextureIsFramebuffer(false);
if (rehash) {
// Update in case any of these changed.
@ -681,6 +683,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
gstate_c.curTextureHeight = h;
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
gstate_c.SetTextureIsArray(false); // Ordinary 2D textures still aren't used by array view in VK. We probably might as well, though, at this point..
gstate_c.SetTextureIsFramebuffer(false);
failedTexture_ = false;
nextTexture_ = entry;
@ -1154,6 +1157,7 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate)
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
gstate_c.SetTextureIsBGRA(false);
gstate_c.SetTextureIsFramebuffer(true);
gstate_c.curTextureXOffset = fbInfo.xOffset;
gstate_c.curTextureYOffset = fbInfo.yOffset;
u32 texW = (u32)gstate.getTextureWidth(0);

View File

@ -1176,7 +1176,7 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
steps_[numSteps_++] = morphcount == 1 ? colstep[col] : colstep_morph[col];
// All color formats decode to DEC_U8_4 currently.
// All color formats decode to DEC_U8_4.
// They can become floats later during transform though.
decFmt.c0fmt = DEC_U8_4;
decFmt.c0off = decOff;

View File

@ -336,7 +336,7 @@ public:
u32 VertexType() const { return fmt_; }
const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
const DecVtxFormat &GetDecVtxFmt() const { return decFmt; }
void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const;

View File

@ -149,7 +149,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_TEXFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE },
{ GE_CMD_TEXLEVEL, FLAG_EXECUTEONCHANGE, DIRTY_TEXTURE_PARAMS, &GPUCommonHW::Execute_TexLevel },
{ GE_CMD_TEXLODSLOPE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
{ GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE | DIRTY_UVSCALEOFFSET },
{ GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE },
{ GE_CMD_TEXADDR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
{ GE_CMD_TEXADDR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
{ GE_CMD_TEXADDR3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
@ -827,13 +827,14 @@ void GPUCommonHW::FastRunLoop(DisplayList &list) {
}
void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) {
if (diff)
if (diff) {
// TODO: We only need to dirty vshader-state here if the output format will be different.
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) {
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
// Switching between through and non-through, we need to invalidate a bunch of stuff.
if (diff & GE_VTYPE_THROUGH_MASK)
if (diff & GE_VTYPE_THROUGH_MASK) {
// Switching between through and non-through, we need to invalidate a bunch of stuff.
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
}
}
}
@ -844,8 +845,6 @@ void GPUCommonHW::Execute_VertexTypeSkinning(u32 op, u32 diff) {
gstate.vertType ^= diff;
Flush();
gstate.vertType ^= diff;
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
// In this case, we may be doing weights and morphs.
// Update any bone matrix uniforms so it uses them correctly.
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
@ -1125,8 +1124,6 @@ bail:
void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
gstate_c.framebufFormat = gstate.FrameBufFormat();
// This also make skipping drawing very effective.
@ -1198,8 +1195,6 @@ void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {
void GPUCommonHW::Execute_Spline(u32 op, u32 diff) {
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
gstate_c.framebufFormat = gstate.FrameBufFormat();
// This also make skipping drawing very effective.
@ -1289,7 +1284,6 @@ void GPUCommonHW::Execute_TexSize0(u32 op, u32 diff) {
if (diff || gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS)) {
gstate_c.curTextureWidth = gstate.getTextureWidth(0);
gstate_c.curTextureHeight = gstate.getTextureHeight(0);
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
// We will need to reset the texture now.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
}

View File

@ -569,8 +569,8 @@ struct GPUStateCache {
}
}
void SetTextureIsArray(bool isArrayTexture) { // VK only
if (arrayTexture != isArrayTexture) {
arrayTexture = isArrayTexture;
if (textureIsArray != isArrayTexture) {
textureIsArray = isArrayTexture;
Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
}
@ -580,6 +580,12 @@ struct GPUStateCache {
Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
}
void SetTextureIsFramebuffer(bool isFramebuffer) {
if (textureIsFramebuffer != isFramebuffer) {
textureIsFramebuffer = isFramebuffer;
Dirty(DIRTY_UVSCALEOFFSET);
}
}
void SetUseFlags(u32 newFlags) {
if (newFlags != useFlags_) {
if (useFlags_ != 0)
@ -614,7 +620,8 @@ public:
bool bgraTexture;
bool needShaderTexClamp;
bool arrayTexture;
bool textureIsArray;
bool textureIsFramebuffer;
bool useFlagsChanged;
float morphWeights[8];

View File

@ -775,7 +775,7 @@ void DrawEngineVulkan::DoFlush() {
textureCache_->ApplyTexture();
textureCache_->GetVulkanHandles(imageView, sampler);
if (imageView == VK_NULL_HANDLE)
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.arrayTexture ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.textureIsArray ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
if (sampler == VK_NULL_HANDLE)
sampler = nullSampler_;
}
@ -910,7 +910,7 @@ void DrawEngineVulkan::DoFlush() {
textureCache_->ApplyTexture();
textureCache_->GetVulkanHandles(imageView, sampler);
if (imageView == VK_NULL_HANDLE)
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.arrayTexture ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.textureIsArray ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
if (sampler == VK_NULL_HANDLE)
sampler = nullSampler_;
}