Compare commits

...

13 Commits

Author SHA1 Message Date
refractionpcsx2
8cdec6fb5a GS: Implement draw buffering 2026-01-29 12:49:18 +00:00
TJnotJT
a1173c53d3 GS/HW: Reduce barriers in triangle strips/fans.
Account for triangles forming non-overlapping quads in triangles strips/fans.
2026-01-26 11:57:20 +01:00
JordanTheToaster
ac0deff9b2 Memcard: Purge automatic save management 2026-01-26 03:35:44 +01:00
refractionpcsx2
3ae707464c GS/HW: Ignore point draws for a single black pixel in the top left 2026-01-26 03:19:49 +01:00
refractionpcsx2
00ef419023 GS/TC: Enhance depth clear behaviour to improve heuristics 2026-01-26 03:19:49 +01:00
refractionpcsx2
448a279cd4 GS/HW: Further improve clear behaviour 2026-01-26 03:19:49 +01:00
refractionpcsx2
70e13adfde GS/TC: Improve wrapping behaviour for inside target lookups 2026-01-26 03:19:49 +01:00
refractionpcsx2
25bc280818 GS/HW: Clear GS memory if not zero or target overwrite 2026-01-26 03:19:49 +01:00
TheLastRar
32e073002a [ci skip] CI/Linux: Fix contribute link 2026-01-25 19:55:48 +01:00
PCSX2 Bot
911314e948 [ci skip] Qt: Update Base Translation. 2026-01-25 02:38:54 +01:00
lightningterror
a73fcb343c GS: Default to DX12 on NV/AMD.
DX12 trades blows with Vulkan on AMD depending on cpu usage and will be stable on RDNA 3 so let's default to it.

NVIDIA: 590 drivers on Nvidia are bad causing performance regressions so let's switch to DX12 as the default.
2026-01-24 20:28:04 +01:00
TheLastRar
251b2960f8 Revert "GS/VK: Use the compute queues for present" 2026-01-24 20:27:30 +01:00
refractionpcsx2
5bb99105c3 GS/HW: Clear downscale source on draw to avoid cross game corruption 2026-01-24 18:32:43 +01:00
21 changed files with 1404 additions and 796 deletions

View File

@@ -20,7 +20,7 @@
<url type="donation">https://github.com/sponsors/PCSX2</url>
<url type="faq">https://pcsx2.net/docs/</url>
<url type="help">https://pcsx2.net/discord</url>
<url type="contribute">https://github.com/PCSX2/pcsx2/blob/master/.github/CONTRIBUTING.md</url>
<url type="contribute">https://pcsx2.net/docs/category/contributing</url>
<url type="translate">https://crowdin.com/project/pcsx2-emulator</url>
<url type="contact">https://mastodon.social/@PCSX2</url>
<screenshots>

View File

@@ -40,7 +40,6 @@ MemoryCardSettingsWidget::MemoryCardSettingsWidget(SettingsWindow* settings_dial
SettingWidgetBinder::BindWidgetToFolderSetting(sif, m_ui.directory, m_ui.browse, m_ui.open, m_ui.reset, "Folders",
"MemoryCards", Path::Combine(EmuFolders::DataRoot, "memcards"));
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.automaticManagement, "EmuCore", "McdFolderAutoManage", true);
setupAdditionalUi();
@@ -58,11 +57,6 @@ MemoryCardSettingsWidget::MemoryCardSettingsWidget(SettingsWindow* settings_dial
connect(m_ui.deleteCard, &QPushButton::clicked, this, &MemoryCardSettingsWidget::deleteCard);
refresh();
dialog()->registerWidgetHelp(m_ui.automaticManagement, tr("Automatically manage saves based on running game"),
tr("Checked"),
tr("(Folder type only / Card size: Auto) Loads only the relevant booted game saves, ignoring others. Avoids "
"running out of space for saves."));
}
MemoryCardSettingsWidget::~MemoryCardSettingsWidget() = default;

View File

@@ -9,6 +9,7 @@
#include <QtWidgets/QTreeWidget>
#include <QtWidgets/QToolButton>
#include <QtWidgets/QListWidget>
#include <QtWidgets/QCheckBox>
#include <array>
#include <optional>

View File

@@ -175,22 +175,6 @@
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="settingsGroupBox">
<property name="title">
<string>Settings</string>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<item row="0" column="0">
<widget class="QCheckBox" name="automaticManagement">
<property name="text">
<string>Automatically manage saves based on running game</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
<customwidgets>
@@ -211,7 +195,6 @@
<tabstop>renameCard</tabstop>
<tabstop>convertCard</tabstop>
<tabstop>deleteCard</tabstop>
<tabstop>automaticManagement</tabstop>
</tabstops>
<resources/>
<connections/>

View File

@@ -11888,12 +11888,12 @@ This action cannot be undone.</source>
<translation type="unfinished"></translation>
</message>
<message>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="2788"/>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="2713"/>
<source>Stencil buffers and texture barriers are both unavailable, this will break some graphical effects.</source>
<translation type="unfinished"></translation>
</message>
<message>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="5130"/>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="5055"/>
<source>Spin GPU During Readbacks is enabled, but calibrated timestamps are unavailable. This might be really slow.</source>
<translation type="unfinished"></translation>
</message>
@@ -11995,7 +11995,7 @@ Please see our official documentation for more information.</source>
<context>
<name>GSDeviceVK</name>
<message>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="2153"/>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="2078"/>
<source>Your GPU does not support the required Vulkan features.</source>
<translation type="unfinished"></translation>
</message>

View File

@@ -1326,7 +1326,6 @@ struct Pcsx2Config
UseSavestateSelector : 1,
InhibitScreensaver : 1,
BackupSavestate : 1,
McdFolderAutoManage : 1,
ManuallySetRealTimeClock : 1, // passes user-set real-time clock information to cdvd at startup
UseSystemLocaleFormat : 1, // presents OS time format instead of yyyy-MM-dd HH:mm:ss for manual RTC

File diff suppressed because it is too large Load Diff

View File

@@ -127,37 +127,41 @@ private:
protected:
static constexpr int INVALID_ALPHA_MINMAX = 500;
static constexpr int MAX_DRAW_BUFFERS = 3;
GSVertex m_v = {};
float m_q = 1.0f;
GSVector4i m_scissor_cull_min = {};
GSVector4i m_scissor_cull_max = {};
GSVector4i m_xyof = {};
u32 m_used_buffers_idx = 0;
u32 m_current_buffer_idx = 0;
bool m_recent_buffer_switch = false;
struct
struct GSVertexBuff
{
GSVertex* buff;
GSVertex* buff_copy; // same size buffer to copy/modify the original buffer
GSVertex* buff_copy; // same size buffer to copy/modify the original buffer
u32 head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
u32 xy_tail;
GSVector4i xy[4];
GSVector4i xyhead;
} m_vertex = {};
};
struct
GSVertexBuff m_vertex_buffers[MAX_DRAW_BUFFERS];
GSVertexBuff* m_vertex;
struct GSIndexBuff
{
u16* buff;
u32 tail;
} m_index = {};
};
struct
{
GSVertex* buff;
u32 head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
u32 xy_tail;
GSVector4i xy[4];
GSVector4i xyhead;
} m_draw_vertex = {};
GSIndexBuff m_index_buffers[MAX_DRAW_BUFFERS];
GSIndexBuff* m_index;
GSVertexBuff m_draw_vertex = {};
struct
{
@@ -165,6 +169,16 @@ protected:
u32 tail;
} m_draw_index = {};
struct GSDrawBufferEnv
{
GSDrawingEnvironment m_env;
int m_backed_up_ctx = 0;
u32 m_dirty_regs = 0;
GSVector4i draw_rect = GSVector4i::zero();
};
GSDrawBufferEnv m_env_buffers[MAX_DRAW_BUFFERS] = {};
void UpdateContext();
void UpdateScissor();
@@ -175,6 +189,7 @@ protected:
template<u32 prim> void HandleAutoFlush();
bool EarlyDetectShuffle(u32 prim);
void CheckCLUTValidity(u32 prim);
bool CheckOverlapVerts(u32 n);
template <u32 prim, bool auto_flush> void VertexKick(u32 skip);
@@ -241,9 +256,10 @@ public:
GSLocalMemory m_mem;
GSDrawingEnvironment m_env = {};
GSDrawingEnvironment m_prev_env = {};
GSDrawingEnvironment m_temp_env = {};
const GSDrawingEnvironment* m_draw_env = &m_env;
GSDrawingContext* m_context = nullptr;
GSVector4i temp_draw_rect = {};
GSVector4i temp_draw_rect;
std::unique_ptr<GSDumpBase> m_dump;
bool m_scissor_invalid = false;
bool m_quad_check_valid = false;
@@ -441,7 +457,15 @@ public:
virtual void Reset(bool hardware_reset);
virtual void UpdateSettings(const Pcsx2Config::GSOptions& old_config);
void ResetDrawBuffers();
void ResetDrawBufferIdx();
void FlushBuffers(bool use_flush_reason = false, GSFlushReason flush_reason = GSFlushReason::CONTEXTCHANGE);
void PushBuffer();
void SetDrawBufferEnv();
void SetDrawBuffDirty();
bool CanBufferNewDraw();
void Flush(GSFlushReason reason);
void FlushDraw(GSFlushReason reason);
u32 CalcMask(int exp, int max_exp);
void FlushPrim();
bool TestDrawChanged();
@@ -454,7 +478,7 @@ public:
virtual void Move();
GSVector4i GetTEX0Rect();
GSVector4i GetTEX0Rect(GSDrawingContext prev_ctx);
void CheckWriteOverlap(bool req_write, bool req_read);
void Write(const u8* mem, int len);
void Read(u8* mem, int len);

View File

@@ -426,7 +426,8 @@ GSRendererType D3D::GetPreferredRenderer()
if (!feature_level.has_value())
return GSRendererType::DX11;
else if (feature_level == D3D_FEATURE_LEVEL_12_0)
return check_vulkan_supported() ? GSRendererType::VK : GSRendererType::OGL;
//return check_vulkan_supported() ? GSRendererType::VK : GSRendererType::OGL;
return GSRendererType::DX12;
else if (feature_level == D3D_FEATURE_LEVEL_11_0)
return GSRendererType::OGL;
else
@@ -439,7 +440,8 @@ GSRendererType D3D::GetPreferredRenderer()
if (!feature_level.has_value())
return GSRendererType::DX11;
else if (feature_level == D3D_FEATURE_LEVEL_12_0)
return check_vulkan_supported() ? GSRendererType::VK : GSRendererType::DX12;
//return check_vulkan_supported() ? GSRendererType::VK : GSRendererType::DX12;
return GSRendererType::DX12;
else if (feature_level == D3D_FEATURE_LEVEL_11_1)
return GSRendererType::DX12;
else

View File

@@ -115,7 +115,7 @@ bool GSHwHack::GSC_IRem(GSRendererHW& r, int& skip)
// Detect the deswizzling shuffle from depth, copying the RG and BA separately on each half of the page (ignore the split).
if (RTME && RFBP != RTBP0 && RFPSM == PSMCT16S && RTPSM == PSMCT16S)
{
if (r.m_vt.m_max.p.x == 64 && r.m_vt.m_max.p.y == 64 && r.m_index.tail == 128)
if (r.m_vt.m_max.p.x == 64 && r.m_vt.m_max.p.y == 64 && r.m_index->tail == 128)
{
const GSVector4i draw_size(r.m_vt.m_min.p.x, r.m_vt.m_min.p.y/2, r.m_vt.m_max.p.x, r.m_vt.m_max.p.y/2);
const GSVector4i read_size(r.m_vt.m_min.t.x, r.m_vt.m_min.t.y/2, r.m_vt.m_max.t.x, r.m_vt.m_max.t.y/2);
@@ -126,7 +126,7 @@ bool GSHwHack::GSC_IRem(GSRendererHW& r, int& skip)
}
// Following the previous draw, it tries to copy everything read from depth and offset it by 2, for the alternate line channel shuffle (skipped above).
if (RTBP0 == (RFBP - 0x20) && r.m_vt.m_max.p.x == 64 && r.m_vt.m_max.p.y == 34 && r.m_index.tail == 2)
if (RTBP0 == (RFBP - 0x20) && r.m_vt.m_max.p.x == 64 && r.m_vt.m_max.p.y == 34 && r.m_index->tail == 2)
{
GSVector4i draw_size(r.m_vt.m_min.p.x, r.m_vt.m_min.p.y - 2.0f, r.m_vt.m_max.p.x, r.m_vt.m_max.p.y - 2.0f);
GSVector4i read_size(r.m_vt.m_min.t.x, r.m_vt.m_min.t.y, r.m_vt.m_max.t.x, r.m_vt.m_max.t.y);
@@ -264,10 +264,10 @@ bool GSHwHack::GSC_SFEX3(GSRendererHW& r, int& skip)
// Skipping is no good as the copy is used again later, and it causes a weird shimmer/echo effect every other frame.
// Add on the height from the second part of the draw to the first, to make it one big rect.
r.m_vertex.buff[1].XYZ.Y += r.m_vertex.buff[r.m_vertex.tail - 1].XYZ.Y - r.m_context->XYOFFSET.OFY;
r.m_vertex.buff[1].V = r.m_vertex.buff[r.m_vertex.tail - 1].V;
r.m_vertex.tail = 2;
r.m_index.tail = 2;
r.m_vertex->buff[1].XYZ.Y += r.m_vertex->buff[r.m_vertex->tail - 1].XYZ.Y - r.m_context->XYOFFSET.OFY;
r.m_vertex->buff[1].V = r.m_vertex->buff[r.m_vertex->tail - 1].V;
r.m_vertex->tail = 2;
r.m_index->tail = 2;
}
}
@@ -332,9 +332,9 @@ bool GSHwHack::GSC_NamcoGames(GSRendererHW& r, int& skip)
{
if (skip == 0)
{
if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index.tail > 2)
if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index->tail > 2)
{
GSVertex* v = &r.m_vertex.buff[0];
GSVertex* v = &r.m_vertex->buff[0];
// Don't enable hack on native res.
// Fixes ghosting/blur effect and white lines appearing in stages: Moonfit Wilderness, Acid Rain - caused by upscaling.
// Game copies the framebuffer as individual page rects with slight offsets (like 1/16 of a pixel etc) which doesn't wokr well with upscaling.
@@ -348,7 +348,7 @@ bool GSHwHack::GSC_NamcoGames(GSRendererHW& r, int& skip)
else
{
// Fixes the alignment of the two halves for the heat haze on the temple stage.
for (u32 i = 0; i < r.m_index.tail; i+=2)
for (u32 i = 0; i < r.m_index->tail; i+=2)
{
v[i].XYZ.Y -= 0x8;
}
@@ -676,7 +676,7 @@ bool GSHwHack::GSC_NFSUndercover(GSRendererHW& r, int& skip)
if (RPRIM->TME && Frame.PSM == PSMCT16S && Frame.FBMSK != 0 && Frame.FBW == 10 && Texture.TBW == 1 && Texture.TBP0 == 0x02800 && Texture.PSM == PSMZ16S)
{
GSVertex* v = &r.m_vertex.buff[1];
GSVertex* v = &r.m_vertex->buff[1];
v[0].XYZ.X = static_cast<u16>(RCONTEXT->XYOFFSET.OFX + ((r.m_r.z * 2) << 4));
v[0].XYZ.Y = static_cast<u16>(RCONTEXT->XYOFFSET.OFY + (r.m_r.w << 4));
v[0].U = r.m_r.z << 4;
@@ -687,8 +687,8 @@ bool GSHwHack::GSC_NFSUndercover(GSRendererHW& r, int& skip)
r.m_vt.m_max.p.y = r.m_r.w;
r.m_vt.m_max.t.x = r.m_r.z;
r.m_vt.m_max.t.y = r.m_r.w;
r.m_vertex.head = r.m_vertex.tail = r.m_vertex.next = 2;
r.m_index.tail = 2;
r.m_vertex->head = r.m_vertex->tail = r.m_vertex->next = 2;
r.m_index->tail = 2;
skip = 79;
}
else
@@ -840,7 +840,7 @@ bool GSHwHack::GSC_Battlefield2(GSRendererHW& r, int& skip)
if (dst)
{
float dc = r.m_vertex.buff[1].XYZ.Z;
float dc = r.m_vertex->buff[1].XYZ.Z;
g_gs_device->ClearDepth(dst->m_texture, dc * std::exp2(-32.0f));
}
}
@@ -858,9 +858,9 @@ bool GSHwHack::GSC_BlueTongueGames(GSRendererHW& r, int& skip)
if (RPRIM->TME && RTEX0.TW == 3 && RTEX0.TH == 3 && RTEX0.PSM == 0 && RFRAME.FBMSK == 0x00FFFFFF && RFRAME.FBW == 8 && r.PCRTCDisplays.GetResolution().x > 512)
{
// Check we are drawing stripes
for (u32 i = 1; i < r.m_vertex.tail; i+=2)
for (u32 i = 1; i < r.m_vertex->tail; i+=2)
{
int value = (((r.m_vertex.buff[i].XYZ.X - r.m_vertex.buff[i - 1].XYZ.X) + 8) >> 4);
int value = (((r.m_vertex->buff[i].XYZ.X - r.m_vertex->buff[i - 1].XYZ.X) + 8) >> 4);
if (value != 32)
return false;
}
@@ -872,18 +872,18 @@ bool GSHwHack::GSC_BlueTongueGames(GSRendererHW& r, int& skip)
for (int vert = 32; vert < 40; vert+=2)
{
r.m_vertex.buff[vert].XYZ.X = context->XYOFFSET.OFX + (((vert * 16) << 4) - 8);
r.m_vertex.buff[vert].XYZ.Y = context->XYOFFSET.OFY;
r.m_vertex.buff[vert].U = (vert * 16) << 4;
r.m_vertex.buff[vert].V = 0;
r.m_vertex.buff[vert+1].XYZ.X = context->XYOFFSET.OFX + ((((vert * 16) + 32) << 4) - 8);
r.m_vertex.buff[vert+1].XYZ.Y = context->XYOFFSET.OFY + (r.PCRTCDisplays.GetResolution().y << 4) + 8;
r.m_vertex.buff[vert+1].U = ((vert * 16) + 32) << 4;
r.m_vertex.buff[vert+1].V = r.PCRTCDisplays.GetResolution().y << 4;
r.m_vertex->buff[vert].XYZ.X = context->XYOFFSET.OFX + (((vert * 16) << 4) - 8);
r.m_vertex->buff[vert].XYZ.Y = context->XYOFFSET.OFY;
r.m_vertex->buff[vert].U = (vert * 16) << 4;
r.m_vertex->buff[vert].V = 0;
r.m_vertex->buff[vert+1].XYZ.X = context->XYOFFSET.OFX + ((((vert * 16) + 32) << 4) - 8);
r.m_vertex->buff[vert+1].XYZ.Y = context->XYOFFSET.OFY + (r.PCRTCDisplays.GetResolution().y << 4) + 8;
r.m_vertex->buff[vert+1].U = ((vert * 16) + 32) << 4;
r.m_vertex->buff[vert+1].V = r.PCRTCDisplays.GetResolution().y << 4;
}
/*r.m_vertex.head = r.m_vertex.tail = r.m_vertex.next = 2;
r.m_index.tail = 2;*/
/*r.m_vertex->head = r.m_vertex->tail = r.m_vertex->next = 2;
r.m_index->tail = 2;*/
r.m_vt.m_max.p.x = r.m_r.z;
r.m_vt.m_max.p.y = r.m_r.w;
@@ -917,7 +917,7 @@ bool GSHwHack::GSC_BlueTongueGames(GSRendererHW& r, int& skip)
// This is the giant dither-like depth buffer. We need this on the CPU *and* the GPU for textures which are
// rendered on both.
if (context->FRAME.FBW == 8 && r.m_index.tail == 32 && r.PRIM->TME && context->TEX0.TBW == 1)
if (context->FRAME.FBW == 8 && r.m_index->tail == 32 && r.PRIM->TME && context->TEX0.TBW == 1)
{
r.SwPrimRender(r, false, false);
return false;
@@ -957,8 +957,8 @@ bool GSHwHack::GSC_MetalGearSolid3(GSRendererHW& r, int& skip)
GL_INS("OI_MetalGearSolid3(): %x -> %x, %dx%d, subtract %d", RFBP, RFBP + (RFBW / 2), r.m_r.width(), r.m_r.height(),
w_sub);
for (u32 i = 0; i < r.m_vertex.next; i++)
r.m_vertex.buff[i].XYZ.X -= w_sub_fp;
for (u32 i = 0; i < r.m_vertex->next; i++)
r.m_vertex->buff[i].XYZ.X -= w_sub_fp;
// No point adjusting the scissor, it just ends up expanding out anyway.. but we do have to fix up the draw rect.
r.m_r -= GSVector4i(w_sub);
@@ -971,7 +971,7 @@ bool GSHwHack::GSC_Turok(GSRendererHW& r, int& skip)
// Since we can't look in to the future to check this, the options are either rearrange all the pages in a target when the width changes
// (very slow, could break a ton of stuff which stores different things in the alpha channel), or this. I choose this.
if (r.m_index.tail == 6 && RPRIM->PRIM == 4 && !RTME && RFBMSK == 0x00FFFFFF && floor(r.m_vt.m_max.p.x) == 512 && r.m_env.CTXT[r.m_backed_up_ctx].FRAME.FBW == 10 && RFRAME.FBW == 8 && RFPSM == PSMCT32 && RTEST.ATE && RTEST.ATST == ATST_GEQUAL)
if (r.m_index->tail == 6 && RPRIM->PRIM == 4 && !RTME && RFBMSK == 0x00FFFFFF && floor(r.m_vt.m_max.p.x) == 512 && r.m_env.CTXT[r.m_backed_up_ctx].FRAME.FBW == 10 && RFRAME.FBW == 8 && RFPSM == PSMCT32 && RTEST.ATE && RTEST.ATST == ATST_GEQUAL)
{
int num_pages = r.m_cached_ctx.FRAME.FBW * ((floor(r.m_vt.m_max.p.y) + 31) / 32);
r.m_cached_ctx.FRAME.FBW = 10;
@@ -988,7 +988,7 @@ bool GSHwHack::GSC_Turok(GSRendererHW& r, int& skip)
bool GSHwHack::OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
const u32 n_vertices = r.m_vertex.next;
const u32 n_vertices = r.m_vertex->next;
const int w = r.m_r.width();
const int h = r.m_r.height();
const bool is_copy = !r.PRIM->ABE || (
@@ -1021,7 +1021,7 @@ bool GSHwHack::OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds
const u32 FBP = r.m_cached_ctx.FRAME.Block();
const u32 FBW = r.m_cached_ctx.FRAME.FBW;
GL_INS("PointListPalette - m_r = <%d, %d => %d, %d>, n_vertices = %u, FBP = 0x%x, FBW = %u", r.m_r.x, r.m_r.y, r.m_r.z, r.m_r.w, n_vertices, FBP, FBW);
const GSVertex* RESTRICT v = r.m_vertex.buff;
const GSVertex* RESTRICT v = r.m_vertex->buff;
const int ox(r.m_context->XYOFFSET.OFX);
const int oy(r.m_context->XYOFFSET.OFY);
for (size_t i = 0; i < n_vertices; ++i)
@@ -1219,9 +1219,9 @@ bool GSHwHack::OI_ArTonelico2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GST
buffer to adapt the page width properly.
*/
const GSVertex* v = &r.m_vertex.buff[0];
const GSVertex* v = &r.m_vertex->buff[0];
if (ds && r.m_vertex.next == 2 && !RPRIM->TME && RFRAME.FBW == 10 && v->XYZ.Z == 0 && RTEST.ZTST == ZTST_ALWAYS)
if (ds && r.m_vertex->next == 2 && !RPRIM->TME && RFRAME.FBW == 10 && v->XYZ.Z == 0 && RTEST.ZTST == ZTST_ALWAYS)
{
GL_INS("OI_ArTonelico2");
g_gs_device->ClearDepth(ds, 0.0f);

File diff suppressed because it is too large Load Diff

View File

@@ -39,21 +39,21 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b
GSRasterizerData data;
GSScanlineGlobalData& gd = data.global;
hw.m_sw_vertex_buffer.resize(((hw.m_vertex.next + 1) & ~1));
hw.m_sw_vertex_buffer.resize(((hw.m_vertex->next + 1) & ~1));
data.primclass = vt.m_primclass;
data.buff = nullptr;
data.vertex = hw.m_sw_vertex_buffer.data();
data.vertex_count = hw.m_vertex.next;
data.index = hw.m_index.buff;
data.index_count = hw.m_index.tail;
data.vertex_count = hw.m_vertex->next;
data.index = hw.m_index->buff;
data.index_count = hw.m_index->tail;
data.scanmsk_value = env.SCANMSK.MSK;
// Skip per pixel division if q is constant.
// Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !vt.m_eq.q.
// If you have both GS_SPRITE_CLASS && vt.m_eq.q, it will depends on the first part of the 'OR'.
const u32 q_div = !hw.IsMipMapActive() && ((vt.m_eq.q && vt.m_min.t.z != 1.0f) || (!vt.m_eq.q && vt.m_primclass == GS_SPRITE_CLASS));
GSVertexSW::s_cvb[vt.m_primclass][PRIM->TME][PRIM->FST][q_div](context, data.vertex, hw.m_vertex.buff, hw.m_vertex.next);
GSVertexSW::s_cvb[vt.m_primclass][PRIM->TME][PRIM->FST][q_div](context, data.vertex, hw.m_vertex->buff, hw.m_vertex->next);
GSVector4i scissor = context->scissor.in;
GSVector4i bbox = GSVector4i(vt.m_min.p.floor().xyxy(vt.m_max.p.ceil())).rintersect(scissor);
@@ -524,12 +524,12 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b
const u32 ofx = context->XYOFFSET.OFX;
for (int i = 0, j = hw.m_vertex.tail; i < j; i++)
for (int i = 0, j = hw.m_vertex->tail; i < j; i++)
{
#if _M_SSE >= 0x501
if ((((hw.m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8
if ((((hw.m_vertex->buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8
#else
if ((((hw.m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4
if ((((hw.m_vertex->buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4
#endif
{
gd.sel.notest = 0;

View File

@@ -1788,8 +1788,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
(GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp >= 16)) && // Channel shuffles or non indexed lookups.
t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/)
{
const u32 end_block = GSLocalMemory::GetEndBlockAddress(bp, TEX0.TBW, TEX0.PSM, r);
const u32 adj_bp = (end_block < t->m_TEX0.TBP0 && t->UnwrappedEndBlock() > GS_MAX_BLOCKS) ? (bp + GS_MAX_BLOCKS) : bp;
u32 rt_tbw = std::max(1U, t->m_TEX0.TBW);
u32 horz_page_offset = ((bp - t->m_TEX0.TBP0) >> 5) % rt_tbw;
u32 horz_page_offset = ((adj_bp - t->m_TEX0.TBP0) >> 5) % rt_tbw;
if (GSLocalMemory::m_psm[psm].bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && bw != rt_tbw && block_boundary_rect.height() > GSLocalMemory::m_psm[psm].pgs.y)
continue;
@@ -1802,7 +1804,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
((t->m_TEX0.TBW < (horz_page_offset + ((block_boundary_rect.z + GSLocalMemory::m_psm[psm].pgs.x - 1) / GSLocalMemory::m_psm[psm].pgs.x)) ||
(t->m_TEX0.TBW != bw && block_boundary_rect.w > GSLocalMemory::m_psm[psm].pgs.y))))
{
DbgCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM);
DbgCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", adj_bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM);
continue;
}
// Keep note that 2 bw is basically 1 normal page, as bw is in 64 pixels, and 8bit pages are 128 pixels wide, aka 2 bw.
@@ -1814,7 +1816,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
((GSLocalMemory::m_psm[psm].bpp == 32) ? bw : ((bw + 1) / 2)) <= t->m_TEX0.TBW) &&
!(((GSLocalMemory::m_psm[psm].bpp == 32) ? bw : ((bw + 1) / 2)) == rt_tbw)))))
{
DbgCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM);
DbgCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", adj_bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM);
continue;
}
else if (!possible_shuffle && GSLocalMemory::m_psm[psm].bpp <= 8 && TEX0.TBW == 1)
@@ -1852,16 +1854,16 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
else // Formats are not compatible for normal draws, only shuffles.
continue;
}
if (bp > t->m_TEX0.TBP0)
if (adj_bp > t->m_TEX0.TBP0)
{
if (!region.HasEither() && GSLocalMemory::m_psm[psm].bpp == 32 && (t->m_TEX0.TBW - (((bp - t->m_TEX0.TBP0) >> 5) % rt_tbw)) < static_cast<u32>((block_boundary_rect.width() + 63) / 64))
if (!region.HasEither() && GSLocalMemory::m_psm[psm].bpp == 32 && (t->m_TEX0.TBW - (((adj_bp - t->m_TEX0.TBP0) >> 5) % rt_tbw)) < static_cast<u32>((block_boundary_rect.width() + 63) / 64))
{
DbgCon.Warning("Bad alignmenet");
continue;
}
// Make sure it's inside if not a shuffle, sometimes valid areas can get messy, like TOCA Race Driver 2 where it goes over to 480, but it's rounded up to 512 in the shuffle.
if (!possible_shuffle && !t->Inside(bp, bw, psm, block_boundary_rect))
if (!possible_shuffle && !t->Inside(adj_bp, bw, psm, block_boundary_rect))
continue;
GSVector4i new_rect = (GSLocalMemory::m_psm[color_psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && (psm & 0x7) != PSMCT16) ? block_boundary_rect : rect;
@@ -1871,7 +1873,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
// Hitman Blood Money is an example of this in the theatre.
const u32 rt_tbw = (possible_shuffle || bw == 1 || GSUtil::GetChannelMask(psm) != 0x8 || frame.FBW <= bw || frame.FBW == t->m_TEX0.TBW || bw == t->m_TEX0.TBW) ? t->m_TEX0.TBW : frame.FBW;
const bool can_translate = CanTranslate(bp, bw, src_psm, new_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, rt_tbw);
const bool can_translate = CanTranslate(adj_bp, bw, src_psm, new_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, rt_tbw);
if (can_translate)
{
const bool swizzle_match = GSLocalMemory::m_psm[src_psm].depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth;
@@ -1881,7 +1883,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
if (swizzle_match)
{
rect = TranslateAlignedRectByPage(t->m_TEX0.TBP0, t->m_end_block, rt_tbw, t->m_TEX0.PSM, t->m_valid, bp, src_psm, bw, new_rect);
rect = TranslateAlignedRectByPage(t->m_TEX0.TBP0, t->m_end_block, rt_tbw, t->m_TEX0.PSM, t->m_valid, adj_bp, src_psm, bw, new_rect);
rect.x -= new_rect.x;
rect.y -= new_rect.y;
}
@@ -1901,7 +1903,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
new_rect.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1);
new_rect.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1);
}
rect = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), src_psm, bw, new_rect);
rect = TranslateAlignedRectByPage(t, adj_bp & ~((1 << 5) - 1), src_psm, bw, new_rect);
rect.x -= new_rect.x & ~(page_size.x - 1);
rect.y -= new_rect.y & ~(page_size.y - 1);
}
@@ -1933,7 +1935,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
}
else
{
SurfaceOffset so = ComputeSurfaceOffset(bp, bw, src_psm, new_rect, t);
SurfaceOffset so = ComputeSurfaceOffset(adj_bp, bw, src_psm, new_rect, t);
if (!so.is_valid && t->Wraps())
{
// Improves Beyond Good & Evil shadow.
@@ -2692,10 +2694,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
for (auto i = list.begin(); i != list.end(); ++i)
{
Target* t = *i;
const u32 end_block = GSLocalMemory::GetEndBlockAddress(bp, TEX0.TBW, TEX0.PSM, GSVector4i(0, size.y, size.x, size.y + 1));
const u32 bp_adj = (end_block < t->m_TEX0.TBP0 && t->UnwrappedEndBlock() > GS_MAX_BLOCKS) ? (bp + GS_MAX_BLOCKS) : bp;
const bool half_buffer_match = GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && TEX0.TBW == t->m_TEX0.TBW && TEX0.PSM == t->m_TEX0.PSM &&
bp == GSLocalMemory::GetStartBlockAddress(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, GSVector4i(0, size.y, size.x, size.y + 1));
// Make sure the target is inside the texture
if (t->m_TEX0.TBP0 <= bp && bp <= t->m_end_block && (half_buffer_match || t->Inside(bp, TEX0.TBW, TEX0.PSM, GSVector4i::loadh(size))))
if (t->m_TEX0.TBP0 <= bp_adj && bp_adj <= t->UnwrappedEndBlock() && (half_buffer_match || t->Inside(bp_adj, TEX0.TBW, TEX0.PSM, GSVector4i::loadh(size))))
{
if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))
continue;
@@ -4306,23 +4310,27 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo
}
}
}*/
void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw)
void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw, u32 fb_mask, bool ignore_exact)
{
const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24);
for (int type = 0; type < 2; type++)
const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24) || (fb_mask & 0xFF000000);
for (int type = 0; type < (ignore_exact ? 1 : 2); type++)
{
auto& list = m_dst[type];
for (auto i = list.begin(); i != list.end();)
{
Target* const t = *i;
if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 > end_bp || t->UnwrappedEndBlock() < start_bp))
if ((ignore_exact && start_bp == t->m_TEX0.TBP0) || (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 > end_bp || t->UnwrappedEndBlock() < start_bp)))
{
++i;
continue;
}
const bool compatible_fmt = GSUtil::HasCompatibleBits(t->m_TEX0.PSM, write_psm);
const bool compatible_width = std::max(t->m_TEX0.TBW, 1U) == std::max(write_bw, 1U);
// If not fully contained but they are aligned and or clean, just dirty the area.
if (type != DepthStencil && start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp))
if ((type != DepthStencil || !compatible_fmt || !compatible_width) && start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp))
{
const u32 offset = (std::abs(static_cast<int>(start_bp - t->m_TEX0.TBP0)) >> 5) % std::max(1U, t->m_TEX0.TBW);
const GSVector4i dirty_rect = t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(t->m_valid);
@@ -4338,7 +4346,7 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
{
RGBAMask mask;
mask._u32 = GSUtil::GetChannelMask(write_psm);
mask._u32 = GSUtil::GetChannelMask(write_psm, fb_mask);
AddDirtyRectTarget(t, invalidate_r, t->m_TEX0.PSM, t->m_TEX0.TBW, mask, false);
}
@@ -4368,7 +4376,7 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
t->m_valid_alpha_low &= preserve_alpha;
t->m_valid_alpha_high &= preserve_alpha;
t->m_valid_rgb = false;
t->m_valid_rgb &= (fb_mask & 0x00FFFFFF) != 0;
// Don't keep partial depth buffers around.
if ((!t->m_valid_alpha_low && !t->m_valid_alpha_high && !t->m_valid_rgb) || type == DepthStencil)
@@ -4390,6 +4398,16 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
delete t;
continue;
}
else if (ignore_exact && GSUtil::HasCompatibleBits(t->m_TEX0.PSM, write_psm))
{
RGBAMask mask;
mask._u32 = GSUtil::GetChannelMask(write_psm, fb_mask);
AddDirtyRectTarget(t, t->m_valid, t->m_TEX0.PSM, t->m_TEX0.TBW, mask, false);
t->m_valid_rgb |= !!(mask._u32 & 0x7);
t->m_valid_alpha_low |= mask.c.a;
t->m_valid_alpha_high |= mask.c.a;
}
GL_CACHE("TC: InvalidateContainedTargets: Clear RGB valid on %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, GSUtil::GetPSMName(t->m_TEX0.PSM));
++i;

View File

@@ -32,8 +32,18 @@ public:
constexpr static bool CheckOverlap(const u32 a_bp, const u32 a_bp_end, const u32 b_bp, const u32 b_bp_end) noexcept
{
const bool valid = a_bp <= a_bp_end && b_bp <= b_bp_end;
const bool overlap = a_bp <= b_bp_end && a_bp_end >= b_bp;
u32 b_bp_start_synced = b_bp;
u32 b_bp_end_synced = b_bp_end;
// Check for wrapping
if (a_bp_end > GS_MAX_BLOCKS && b_bp_end < a_bp)
{
b_bp_start_synced += GS_MAX_BLOCKS;
b_bp_end_synced += GS_MAX_BLOCKS;
}
const bool valid = a_bp <= a_bp_end && b_bp_start_synced <= b_bp_end_synced;
const bool overlap = a_bp <= b_bp_end_synced && a_bp_end >= b_bp_start_synced;
return valid && overlap;
}
@@ -522,7 +532,7 @@ public:
bool HasTargetInHeightCache(u32 bp, u32 fbw, u32 psm, u32 max_age = std::numeric_limits<u32>::max(), bool move_front = true);
bool Has32BitTarget(u32 bp);
void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1);
void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1, u32 fb_mask = 0x00000000, bool ignore_exact = false);
void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0, bool dirty_only = false);
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);

View File

@@ -320,14 +320,14 @@ void GSRendererSW::RewriteVerticesIfSTOverflow()
constexpr int n = GSUtil::GetClassVertexCount(primclass);
// Make sure the copy buffer is large enough.
while (m_vertex.maxcount < m_index.tail)
while (m_vertex->maxcount < m_index->tail)
GrowVertexBuffer();
GSVertex* RESTRICT vertex = m_vertex.buff;
GSVertex* RESTRICT vertex_copy = m_vertex.buff_copy;
u16* RESTRICT index = m_index.buff;
GSVertex* RESTRICT vertex = m_vertex->buff;
GSVertex* RESTRICT vertex_copy = m_vertex->buff_copy;
u16* RESTRICT index = m_index->buff;
for (int i = 0; i < static_cast<int>(m_index.tail); i += n)
for (int i = 0; i < static_cast<int>(m_index->tail); i += n)
{
GSVector4 stcq[n];
@@ -381,18 +381,18 @@ void GSRendererSW::RewriteVerticesIfSTOverflow()
}
// Swap the buffers and fix the counts.
std::swap(m_vertex.buff, m_vertex.buff_copy);
m_vertex.head = m_vertex.next = m_vertex.tail = m_index.tail;
std::swap(m_vertex->buff, m_vertex->buff_copy);
m_vertex->head = m_vertex->next = m_vertex->tail = m_index->tail;
// Recalculate ST min/max/eq in the vertex trace.
GSVector4 tmin = GSVector4::cxpr(FLT_MAX);
GSVector4 tmax = GSVector4::cxpr(-FLT_MAX);
for (int i = 0; i < static_cast<int>(m_index.tail); i += n)
for (int i = 0; i < static_cast<int>(m_index->tail); i += n)
{
for (int j = 0; j < n; j++)
{
GSVector4 stcq = GSVector4::cast(GSVector4i(m_vertex.buff[i + j].m[0]));
const float Q = (primclass == GS_SPRITE_CLASS) ? stcq.w : m_vertex.buff[i + 1].RGBAQ.Q;
GSVector4 stcq = GSVector4::cast(GSVector4i(m_vertex->buff[i + j].m[0]));
const float Q = (primclass == GS_SPRITE_CLASS) ? stcq.w : m_vertex->buff[i + 1].RGBAQ.Q;
stcq = (stcq / Q).xyzw(stcq);
tmin = tmin.min(stcq);
@@ -451,11 +451,11 @@ void GSRendererSW::Draw()
SharedData* sd = static_cast<SharedData*>(data.get());
sd->primclass = m_vt.m_primclass;
sd->buff = (u8*)m_vertex_heap.alloc(sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1) + sizeof(u32) * m_index.tail, 64);
sd->buff = (u8*)m_vertex_heap.alloc(sizeof(GSVertexSW) * ((m_vertex->next + 1) & ~1) + sizeof(u32) * m_index->tail, 64);
sd->vertex = (GSVertexSW*)sd->buff;
sd->vertex_count = m_vertex.next;
sd->index = (u16*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1));
sd->index_count = m_index.tail;
sd->vertex_count = m_vertex->next;
sd->index = (u16*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex->next + 1) & ~1));
sd->index_count = m_index->tail;
sd->scanmsk_value = m_draw_env->SCANMSK.MSK;
// skip per pixel division if q is constant.
@@ -463,9 +463,9 @@ void GSRendererSW::Draw()
// If you have both GS_SPRITE_CLASS && m_vt.m_eq.q, it will depends on the first part of the 'OR'
u32 q_div = !IsMipMapActive() && ((m_vt.m_eq.q && m_vt.m_min.t.z != 1.0f) || (!m_vt.m_eq.q && m_vt.m_primclass == GS_SPRITE_CLASS));
GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, sd->vertex, m_vertex.buff, m_vertex.next);
GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, sd->vertex, m_vertex->buff, m_vertex->next);
std::memcpy(sd->index, m_index.buff, sizeof(u16) * m_index.tail);
std::memcpy(sd->index, m_index->buff, sizeof(u16) * m_index->tail);
GSVector4i scissor = context->scissor.in;
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().upld(m_vt.m_max.p.floor())) + GSVector4i(0, 0, 1, 1); // right/bottom should be exclusive so +1
@@ -485,12 +485,12 @@ void GSRendererSW::Draw()
{
int n = GSUtil::GetVertexCount(PRIM->PRIM);
for (u32 i = 0, j = 0; i < m_index.tail; i += n, j++)
for (u32 i = 0, j = 0; i < m_index->tail; i += n, j++)
{
for (int k = 0; k < n; k++)
{
GSVertex* v = &m_vertex.buff[m_index.buff[i + k]];
GSVertex* vn = &m_vertex.buff[m_index.buff[i + n - 1]];
GSVertex* v = &m_vertex->buff[m_index->buff[i + k]];
GSVertex* vn = &m_vertex->buff[m_index->buff[i + n - 1]];
fprintf(s_fp, "%d:%d %f %f %f %f\n",
j, k,
@@ -1505,12 +1505,12 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
u32 ofx = context->XYOFFSET.OFX;
for (int i = 0, j = m_vertex.tail; i < j; i++)
for (int i = 0, j = m_vertex->tail; i < j; i++)
{
#if _M_SSE >= 0x501
if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8
if ((((m_vertex->buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8
#else
if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4
if ((((m_vertex->buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4
#endif
{
gd.sel.notest = 0;

View File

@@ -481,72 +481,26 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, queue_family_properties.data());
DevCon.WriteLn("%u vulkan queue families", queue_family_count);
std::vector<uint32_t> queue_family_users(queue_family_count, 0);
// Find graphics and present queues.
m_graphics_queue_family_index = queue_family_count;
m_present_queue_family_index = queue_family_count;
u32 present_queue_index = 0;
m_spin_queue_family_index = queue_family_count;
u32 spin_queue_index = 0;
// Graphics Queue
for (uint32_t i = 0; i < queue_family_count; i++)
{
if (queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT)
VkBool32 graphics_supported = queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT;
if (graphics_supported)
{
m_graphics_queue_family_index = i;
queue_family_users[i]++;
break;
// Quit now, no need for a present queue.
if (!surface)
{
break;
}
}
}
// Spinwait Queue
for (uint32_t i = 0; i < queue_family_count; i++)
{
if (queue_family_properties[i].queueCount == queue_family_users[i])
continue;
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_COMPUTE_BIT))
continue;
if (queue_family_properties[i].timestampValidBits == 0)
continue; // We need timing
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
if (surface)
{
m_spin_queue_family_index = i;
break;
}
else if (m_spin_queue_family_index == queue_family_count)
m_spin_queue_family_index = i;
}
if (m_spin_queue_family_index != queue_family_count)
{
spin_queue_index = queue_family_users[m_spin_queue_family_index];
queue_family_users[m_spin_queue_family_index]++;
m_spin_queue_is_graphics_queue = false;
}
else
{
// No spare queue? Try the graphics queue.
if ((queue_family_properties[m_graphics_queue_family_index].queueFlags & VK_QUEUE_COMPUTE_BIT) &&
(queue_family_properties[m_graphics_queue_family_index].timestampValidBits != 0))
{
m_spin_queue_family_index = m_graphics_queue_family_index;
spin_queue_index = 0;
m_spin_queue_is_graphics_queue = true;
}
else
m_spin_queue_is_graphics_queue = false;
}
// Present Queue
if (surface)
{
for (uint32_t i = 0; i < queue_family_count; i++)
{
if (queue_family_properties[i].queueCount == queue_family_users[i])
continue;
VkBool32 present_supported;
VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, i, surface, &present_supported);
if (res != VK_SUCCESS)
@@ -555,48 +509,35 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
return false;
}
if (!present_supported)
continue;
// Perfer aync compute queue
if ((queue_family_properties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) &&
!(queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
{
m_present_queue_family_index = i;
break;
}
else if (m_present_queue_family_index == queue_family_count)
m_present_queue_family_index = i;
}
if (m_present_queue_family_index != queue_family_count)
{
present_queue_index = queue_family_users[m_present_queue_family_index];
queue_family_users[m_present_queue_family_index]++;
}
else
{
// No spare queue? Try the graphics queue.
VkBool32 present_supported;
VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, m_graphics_queue_family_index, surface, &present_supported);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: ");
return false;
}
if (present_supported)
{
m_present_queue_family_index = m_graphics_queue_family_index;
present_queue_index = 0;
m_present_queue_family_index = i;
}
// Prefer one queue family index that does both graphics and present.
if (graphics_supported && present_supported)
{
break;
}
}
}
// Swap spin and present to simplify queue priorities logic.
if (!m_spin_queue_is_graphics_queue && m_present_queue_family_index == m_spin_queue_family_index)
std::swap(spin_queue_index, present_queue_index);
for (uint32_t i = 0; i < queue_family_count; i++)
{
// Pick a queue for spinning
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_COMPUTE_BIT))
continue; // We need compute
if (queue_family_properties[i].timestampValidBits == 0)
continue; // We need timing
const bool queue_is_used = i == m_graphics_queue_family_index || i == m_present_queue_family_index;
if (queue_is_used && m_spin_queue_family_index != queue_family_count)
continue; // Found a non-graphics queue to use
spin_queue_index = 0;
m_spin_queue_family_index = i;
if (queue_is_used && queue_family_properties[i].queueCount > 1)
spin_queue_index = 1;
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
break; // Async compute queue, definitely pick this one
}
if (m_graphics_queue_family_index == queue_family_count)
{
Console.Error("VK: Failed to find an acceptable graphics queue.");
@@ -614,16 +555,14 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
device_info.flags = 0;
device_info.queueCreateInfoCount = 0;
// Low priority for the spin queue
static constexpr float queue_priorities[] = {1.0f, 1.0f, 0.0f};
static constexpr float queue_priorities[] = {1.0f, 0.0f}; // Low priority for the spin queue
std::array<VkDeviceQueueCreateInfo, 3> queue_infos;
VkDeviceQueueCreateInfo& graphics_queue_info = queue_infos[device_info.queueCreateInfoCount++];
graphics_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
graphics_queue_info.pNext = nullptr;
graphics_queue_info.flags = 0;
graphics_queue_info.queueFamilyIndex = m_graphics_queue_family_index;
graphics_queue_info.queueCount = queue_family_users[m_graphics_queue_family_index];
graphics_queue_info.queueCount = 1;
graphics_queue_info.pQueuePriorities = queue_priorities;
if (surface != VK_NULL_HANDLE && m_graphics_queue_family_index != m_present_queue_family_index)
@@ -633,19 +572,19 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
present_queue_info.pNext = nullptr;
present_queue_info.flags = 0;
present_queue_info.queueFamilyIndex = m_present_queue_family_index;
present_queue_info.queueCount = queue_family_users[m_present_queue_family_index];
present_queue_info.queueCount = 1;
present_queue_info.pQueuePriorities = queue_priorities;
}
if (m_spin_queue_family_index == m_graphics_queue_family_index)
{
if (spin_queue_index == 1)
graphics_queue_info.pQueuePriorities = queue_priorities + 1;
if (spin_queue_index != 0)
graphics_queue_info.queueCount = 2;
}
else if (m_spin_queue_family_index == m_present_queue_family_index)
{
if (spin_queue_index == 1)
queue_infos[1].pQueuePriorities = queue_priorities + 1;
if (spin_queue_index != 0)
queue_infos[1].queueCount = 2; // present queue
}
else if (m_spin_queue_family_index != queue_family_count)
{
@@ -655,7 +594,7 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
spin_queue_info.flags = 0;
spin_queue_info.queueFamilyIndex = m_spin_queue_family_index;
spin_queue_info.queueCount = 1;
spin_queue_info.pQueuePriorities = queue_priorities + 2;
spin_queue_info.pQueuePriorities = queue_priorities + 1;
}
device_info.pQueueCreateInfos = queue_infos.data();
@@ -744,11 +683,13 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
vkGetDeviceQueue(m_device, m_graphics_queue_family_index, 0, &m_graphics_queue);
if (surface)
{
vkGetDeviceQueue(m_device, m_present_queue_family_index, present_queue_index, &m_present_queue);
vkGetDeviceQueue(m_device, m_present_queue_family_index, 0, &m_present_queue);
}
m_spinning_supported = m_spin_queue_family_index != queue_family_count &&
queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
m_device_properties.limits.timestampPeriod > 0;
m_spin_queue_is_graphics_queue =
m_spin_queue_family_index == m_graphics_queue_family_index && spin_queue_index == 0;
m_gpu_timing_supported = (m_device_properties.limits.timestampComputeAndGraphics != 0 &&
queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
@@ -1348,24 +1289,8 @@ void GSDeviceVK::SubmitCommandBuffer(VKSwapChain* present_swap_chain)
if (present_swap_chain)
{
// vkQueuePresentKHR on NVidia dosn't seem to properly wait on the passed semaphore, causing artifacts.
// OBS capture with BPM encouters issues, but can apparently occur on the presented image aswell.
// Instead, wait on the RenderingFinished semaphore with vkQueueSubmit.
const uint32_t present_wait_bits = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
const VkSubmitInfo submit_present_wait_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 1,
present_swap_chain->GetRenderingFinishedSemaphorePtr(), &present_wait_bits, 0,
nullptr, 1, present_swap_chain->GetPresentReadySemaphorePtr()};
res = vkQueueSubmit(m_present_queue, 1, &submit_present_wait_info, nullptr);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: ");
m_last_submit_failed = true;
return;
}
const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, nullptr, 1,
present_swap_chain->GetPresentReadySemaphorePtr(), 1, present_swap_chain->GetSwapChainPtr(),
present_swap_chain->GetRenderingFinishedSemaphorePtr(), 1, present_swap_chain->GetSwapChainPtr(),
present_swap_chain->GetCurrentImageIndexPtr(), nullptr};
present_swap_chain->ResetImageAcquireResult();

View File

@@ -490,18 +490,6 @@ bool VKSwapChain::CreateSwapChain()
sema.available_semaphore = VK_NULL_HANDLE;
return false;
}
res = vkCreateSemaphore(
GSDeviceVK::GetInstance()->GetDevice(), &semaphore_info, nullptr, &sema.present_ready_semaphore);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: ");
vkDestroySemaphore(GSDeviceVK::GetInstance()->GetDevice(), sema.rendering_finished_semaphore, nullptr);
vkDestroySemaphore(GSDeviceVK::GetInstance()->GetDevice(), sema.available_semaphore, nullptr);
sema.rendering_finished_semaphore = VK_NULL_HANDLE;
sema.available_semaphore = VK_NULL_HANDLE;
return false;
}
}
return true;
@@ -517,8 +505,6 @@ void VKSwapChain::DestroySwapChainImages()
m_images.clear();
for (auto& it : m_semaphores)
{
if (it.present_ready_semaphore != VK_NULL_HANDLE)
vkDestroySemaphore(GSDeviceVK::GetInstance()->GetDevice(), it.present_ready_semaphore, nullptr);
if (it.rendering_finished_semaphore != VK_NULL_HANDLE)
vkDestroySemaphore(GSDeviceVK::GetInstance()->GetDevice(), it.rendering_finished_semaphore, nullptr);
if (it.available_semaphore != VK_NULL_HANDLE)

View File

@@ -64,14 +64,6 @@ public:
{
return &m_semaphores[m_current_semaphore].rendering_finished_semaphore;
}
__fi VkSemaphore GetPresentReadySemaphore() const
{
return m_semaphores[m_current_semaphore].present_ready_semaphore;
}
__fi const VkSemaphore* GetPresentReadySemaphorePtr() const
{
return &m_semaphores[m_current_semaphore].present_ready_semaphore;
}
VkFormat GetTextureFormat() const;
VkResult AcquireNextImage();
@@ -100,7 +92,6 @@ private:
{
VkSemaphore available_semaphore;
VkSemaphore rendering_finished_semaphore;
VkSemaphore present_ready_semaphore;
};
WindowInfo m_window_info;

View File

@@ -1923,7 +1923,6 @@ Pcsx2Config::Pcsx2Config()
{
bitset = 0;
// Set defaults for fresh installs / reset settings
McdFolderAutoManage = true;
EnablePatches = true;
EnableFastBoot = true;
EnableRecordingTools = true;
@@ -1979,7 +1978,6 @@ void Pcsx2Config::LoadSaveCore(SettingsWrapper& wrap)
SettingsWrapBitBool(HostFs);
SettingsWrapBitBool(BackupSavestate);
SettingsWrapBitBool(McdFolderAutoManage);
SettingsWrapBitBool(WarnAboutUnsafeSettings);

View File

@@ -611,7 +611,7 @@ void FileMcd_EmuOpen()
Mcd::impl.Open();
Mcd::implFolder.SetFiltering(EmuConfig.McdFolderAutoManage);
Mcd::implFolder.SetFiltering(true);
Mcd::implFolder.Open();
}
@@ -823,7 +823,7 @@ int FileMcd_ReIndex(uint port, uint slot, const std::string& filter)
// return Mcd::impl.ReIndex( combinedSlot, filter );
// break;
case MemoryCardType::Folder:
if (!Mcd::implFolder.ReIndex(combinedSlot, EmuConfig.McdFolderAutoManage, filter))
if (!Mcd::implFolder.ReIndex(combinedSlot, true, filter))
return -1;
break;
default:
@@ -930,7 +930,7 @@ std::vector<AvailableMcdInfo> FileMcd_GetAvailableCards(bool include_in_use_card
Pcsx2Config::McdOptions config;
config.Enabled = true;
config.Type = MemoryCardType::Folder;
sourceFolderMemoryCard.Open(fd.FileName, config, (8 * 1024 * 1024) / FolderMemoryCard::ClusterSize, EmuConfig.McdFolderAutoManage, "");
sourceFolderMemoryCard.Open(fd.FileName, config, (8 * 1024 * 1024) / FolderMemoryCard::ClusterSize, true, "");
mcds.push_back({std::move(basename), std::move(fd.FileName), fd.ModificationTime,
MemoryCardType::Folder, MemoryCardFileType::Unknown, 0u, sourceFolderMemoryCard.IsFormatted()});

View File

@@ -2993,8 +2993,6 @@ void VMManager::CheckForMemoryCardConfigChanges(const Pcsx2Config& old_config)
}
}
changed |= (EmuConfig.McdFolderAutoManage != old_config.McdFolderAutoManage);
if (!changed)
return;