diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index f747c0e9e3..86346c8eea 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -105,18 +105,18 @@ GSState::GSState() s_n = 0; s_transfer_n = 0; + memset(&m_v, 0, sizeof(m_v)); - memset(&m_vertex, 0, sizeof(m_vertex)); - memset(&m_index, 0, sizeof(m_index)); memset(m_mem.m_vm8, 0, m_mem.m_vmsize); m_v.RGBAQ.Q = 1.0f; - GrowVertexBuffer(); - PRIM = &m_env.PRIM; //CSR->rREV = 0x20; m_env.PRMODECONT.AC = 1; + + ResetDrawBuffers(); + Reset(false); ResetHandlers(); @@ -124,12 +124,15 @@ GSState::GSState() GSState::~GSState() { - if (m_vertex.buff) - _aligned_free(m_vertex.buff); - if (m_vertex.buff_copy) - _aligned_free(m_vertex.buff_copy); - if (m_index.buff) - _aligned_free(m_index.buff); + for (int i = 0; i < MAX_DRAW_BUFFERS; i++) + { + if (m_index_buffers[i].buff) + _aligned_free(m_index_buffers[i].buff); + if (m_vertex_buffers[i].buff) + _aligned_free(m_vertex_buffers[i].buff); + if (m_vertex_buffers[i].buff_copy) + _aligned_free(m_vertex_buffers[i].buff_copy); + } if (m_draw_vertex.buff) _aligned_free(m_draw_vertex.buff); if (m_draw_index.buff) @@ -191,10 +194,10 @@ void GSState::Reset(bool hardware_reset) UpdateScissor(); - m_vertex.head = 0; - m_vertex.tail = 0; - m_vertex.next = 0; - m_index.tail = 0; + m_vertex->head = 0; + m_vertex->tail = 0; + m_vertex->next = 0; + m_index->tail = 0; m_scanmask_used = 0; m_texflush_flag = false; m_channel_shuffle = false; @@ -203,6 +206,8 @@ void GSState::Reset(bool hardware_reset) memcpy(&m_prev_env, &m_env, sizeof(m_prev_env)); + ResetDrawBuffers(); + m_perfmon_draw.Reset(); m_perfmon_frame.Reset(); } @@ -234,8 +239,422 @@ void GSState::SetPrimHandlers() #undef SetHandlerXYZ } -void GSState::ResetHandlers() +static constexpr u32 NumIndicesForPrim(u32 prim) { + switch (prim) + { + case GS_POINTLIST: + case GS_INVALID: + return 1; + case GS_LINELIST: + case GS_SPRITE: + case GS_LINESTRIP: + return 2; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + return 3; + default: + return 0; + } +} + +static constexpr u32 MaxVerticesForPrim(u32 prim) +{ + switch (prim) + { + // Four indices per 1 vertex. + case GS_POINTLIST: + case GS_INVALID: + + // Indices are shifted left by 2 to form quads. + case GS_LINELIST: + case GS_LINESTRIP: + return (std::numeric_limits::max() / 4) - 4; + + // Four indices per two vertices. + case GS_SPRITE: + return (std::numeric_limits::max() / 2) - 2; + + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + default: + return (std::numeric_limits::max() - 3); + } +} + + +void GSState::ResetDrawBufferIdx() +{ + for (u32 i = 1; i < m_used_buffers_idx; i++) + { + if (m_index_buffers[i].tail) + FlushDraw(GSFlushReason::CONTEXTCHANGE); + + // There can be situations like VSync where it won't purge the draws, this is bad for us! + if (i == m_current_buffer_idx) + { + memcpy(m_vertex_buffers[0].buff, m_vertex_buffers[i].buff, sizeof(GSVertex) * m_vertex_buffers[i].tail); + + m_vertex_buffers[0].head = m_vertex_buffers[i].head; + m_vertex_buffers[0].tail = m_vertex_buffers[i].tail; + m_vertex_buffers[0].next = m_vertex_buffers[i].next; + + if (m_vertex_buffers[0].tail != 0) + { + memcpy(m_vertex_buffers[0].xy, m_vertex_buffers[i].xy, sizeof(m_vertex_buffers[i].xy)); + m_vertex_buffers[0].xyhead = m_vertex_buffers[i].xyhead; + m_vertex_buffers[0].xy_tail = m_vertex_buffers[i].xy_tail; + } + else + { + m_vertex_buffers[0].xy_tail = 0; + } + + memcpy(&m_env_buffers[0], &m_env_buffers[i], sizeof(m_env_buffers[i])); + } + + memset(&m_env_buffers[i], 0, sizeof(GSDrawBufferEnv)); + m_vertex_buffers[i].head = m_vertex_buffers[i].tail = m_vertex_buffers[i].next = 0; + } + + m_env_buffers[0].draw_rect = GSVector4i::zero(); + m_index = &m_index_buffers[0]; + m_vertex = &m_vertex_buffers[0]; + m_used_buffers_idx = 1; + m_current_buffer_idx = 0; + m_backed_up_ctx = -1; + m_dirty_gs_regs = 0; + + UpdateContext(); + //DevCon.Warning("New round of draws buffer %d vertex tail %d index tail %d TME %d TBP0 0x%x draw %d", m_current_buffer_idx, m_vertex->tail, m_index->tail, m_env.PRIM.TME, m_env.CTXT[m_env.PRIM.CTXT].TEX0.TBP0, s_n); +} + + +void GSState::ResetDrawBuffers() +{ + m_used_buffers_idx = 1; + + for (int i = 0; i < MAX_DRAW_BUFFERS; i++) + { + memset(&m_index_buffers[i], 0, sizeof(GSIndexBuff)); + memset(&m_vertex_buffers[i], 0, sizeof(GSVertexBuff)); + memset(&m_env_buffers[i], 0, sizeof(GSDrawBufferEnv)); + m_env_buffers[i].m_dirty_regs = 0xffff; + m_index = &m_index_buffers[i]; + m_vertex = &m_vertex_buffers[i]; + m_vertex_buffers[i].head = m_vertex_buffers[i].tail = m_vertex_buffers[i].next = 0; + GrowVertexBuffer(); + } + + ResetDrawBufferIdx(); +} + +// exclude_current is used if there is a flush for a reason other than the normal context change. +void GSState::FlushBuffers(bool use_flush_reason, GSFlushReason flush_reason) +{ + const u32 current_idx = m_current_buffer_idx; + bool restore_env = false; + + if (m_used_buffers_idx > 0) + { + if (m_used_buffers_idx > 1) + { + restore_env = true; + memcpy(&m_temp_env, &m_env, sizeof(m_env)); + } + else if (m_index_buffers[0].tail == 0) + return; + + //DevCon.Warning("Flushing %d draw buffers from draw %d", m_used_buffers_idx, s_n); + for (u32 i = 0; i < m_used_buffers_idx; i++) + { + m_current_buffer_idx = i; + + m_index = &m_index_buffers[m_current_buffer_idx]; + m_vertex = &m_vertex_buffers[m_current_buffer_idx]; + m_backed_up_ctx = m_env_buffers[m_current_buffer_idx].m_backed_up_ctx; + m_dirty_gs_regs = m_env_buffers[m_current_buffer_idx].m_dirty_regs; + temp_draw_rect = m_env_buffers[m_current_buffer_idx].draw_rect; + + std::memcpy(&m_prev_env, &m_env_buffers[m_current_buffer_idx].m_env, 88); + std::memcpy(&m_prev_env.CTXT[0], &m_env_buffers[m_current_buffer_idx].m_env.CTXT[0], 96); + std::memcpy(&m_prev_env.CTXT[1], &m_env_buffers[m_current_buffer_idx].m_env.CTXT[1], 96); + + const int ctx = m_env_buffers[m_current_buffer_idx].m_backed_up_ctx; + std::memcpy(&m_prev_env.CTXT[ctx].offset, &m_env_buffers[i].m_env.CTXT[ctx].offset, sizeof(m_env_buffers[i].m_env.CTXT[ctx].offset)); + std::memcpy(&m_prev_env.CTXT[ctx].scissor, &m_env_buffers[i].m_env.CTXT[ctx].scissor, sizeof(m_env_buffers[i].m_env.CTXT[ctx].scissor)); + + if ((i + 1) < m_used_buffers_idx) + { + const int next_backed_ctx = m_env_buffers[m_current_buffer_idx + 1].m_backed_up_ctx; + + if (next_backed_ctx < 0) + continue; + + std::memcpy(&m_env, &m_env_buffers[m_current_buffer_idx + 1].m_env, 88); + std::memcpy(&m_env.CTXT[0], &m_env_buffers[m_current_buffer_idx + 1].m_env.CTXT[0], 96); + std::memcpy(&m_env.CTXT[1], &m_env_buffers[m_current_buffer_idx + 1].m_env.CTXT[1], 96); + } + else if (restore_env) + memcpy(&m_env, &m_temp_env, sizeof(m_env)); + + if (use_flush_reason && i == current_idx) + FlushDraw(flush_reason); + else + FlushDraw(GSFlushReason::CONTEXTCHANGE); + } + } + + // Restore the environment + m_current_buffer_idx = current_idx; + m_index = &m_index_buffers[m_current_buffer_idx]; + m_vertex = &m_vertex_buffers[m_current_buffer_idx]; + + const int ctx = m_env_buffers[m_current_buffer_idx].m_backed_up_ctx; + std::memcpy(&m_prev_env, &m_env_buffers[m_current_buffer_idx].m_env, 88); + std::memcpy(&m_prev_env.CTXT[0], &m_env_buffers[m_current_buffer_idx].m_env.CTXT[0], 96); + std::memcpy(&m_prev_env.CTXT[1], &m_env_buffers[m_current_buffer_idx].m_env.CTXT[1], 96); + std::memcpy(&m_prev_env.CTXT[ctx].offset, &m_env_buffers[m_current_buffer_idx].m_env.CTXT[ctx].offset, sizeof(m_env_buffers[m_current_buffer_idx].m_env.CTXT[ctx].offset)); + std::memcpy(&m_prev_env.CTXT[ctx].scissor, &m_env_buffers[m_current_buffer_idx].m_env.CTXT[ctx].scissor, sizeof(m_env_buffers[m_current_buffer_idx].m_env.CTXT[ctx].scissor)); + + UpdateContext(); +} + +void GSState::PushBuffer() +{ + // Just in case it tries to overflow. + if (m_used_buffers_idx >= MAX_DRAW_BUFFERS) + { + //DevCon.Warning("Attempted to add a draw to buffer when full. Flushing"); + FlushBuffers(false); + ResetDrawBufferIdx(); + return; + } + + if (m_used_buffers_idx < MAX_DRAW_BUFFERS) + { + m_index = &m_index_buffers[m_used_buffers_idx]; + m_vertex = &m_vertex_buffers[m_used_buffers_idx]; + + const u32 base = m_vertex_buffers[m_current_buffer_idx].head; + const u32 copy_amt = m_vertex_buffers[m_current_buffer_idx].tail - base; + + m_vertex->tail = 0; + + if (copy_amt) + memcpy(m_vertex->buff, &m_vertex_buffers[m_current_buffer_idx].buff[base], sizeof(GSVertex) * copy_amt); + + m_vertex->head = 0; + m_vertex->next = 0; + m_vertex->tail += copy_amt; + + if (copy_amt) + { + for (u32 i = 0; i < copy_amt; i++) + { + GSVector4i* RESTRICT vert_ptr = (GSVector4i*)&m_vertex->buff[m_vertex->head + i]; + GSVector4i v = vert_ptr[1]; + v = v.xxxx().u16to32().sub32(m_xyof); + v = v.blend32<12>(v.sra32<4>()); + m_vertex->xy[i & 3] = v; + m_vertex->xy_tail = std::min(copy_amt, 2U); + + if (i == 0) + m_vertex->xyhead = v; + } + } + else + m_vertex->xy_tail = 0; + + m_current_buffer_idx = m_used_buffers_idx; + m_env_buffers[m_current_buffer_idx].draw_rect = m_dirty_gs_regs; + temp_draw_rect = m_env_buffers[m_current_buffer_idx].draw_rect; + m_dirty_gs_regs = 0; + m_used_buffers_idx++; + //DevCon.Warning("Pushing new buffer %d vertex tail %d index tail %d TME %d TBP0 0x%x draw %d", m_current_buffer_idx, m_vertex->tail, m_index->tail, m_env.PRIM.TME, m_env.CTXT[m_env.PRIM.CTXT].TEX0.TBP0, s_n); + } +} + +bool GSState::CanBufferNewDraw() +{ + GSDrawingContext& cur_context = m_env.CTXT[m_env.PRIM.CTXT]; + GSDrawingContext& base_context = m_env_buffers[0].m_env.CTXT[m_env_buffers[0].m_env.PRIM.CTXT]; + + // If the base draw isn't writing to the Z buffer, but following draws do, we can't use it. + // Also the base draw needs to be solid, not an alpha blend. + if (base_context.ZBUF.ZMSK || cur_context.FRAME.FBP != base_context.FRAME.FBP || cur_context.ZBUF.ZBP != base_context.ZBUF.ZBP) + { + //DevCon.Warning("Flushing, cannot buffer draw due to incompatible base"); + return false; + } + + if (m_env.PRIM.PRIM != m_env_buffers[0].m_env.PRIM.PRIM || GSUtil::GetPrimClass(m_env.PRIM.PRIM) != GS_TRIANGLE_CLASS) + return false; + + // It's expected that the Z used will match what has currently been drawn, so GREATER or ALWAYS aren't realy usable. + if (!cur_context.TEST.ZTE || cur_context.TEST.ZTST != ZTST_GEQUAL) + return false; + + // If the base ends up blending with Ad, we might have draws in the wrong order, as it could be relying on the new data. + // Let it reorient. (Jak water) + if (m_env_buffers[0].m_env.PRIM.ABE && base_context.ALPHA.C == 1) + return false; + + // Find any previous context which matches. + for (u32 i = 0; i < m_used_buffers_idx; i++) + { + const int ctx = m_env.PRIM.CTXT; + if (ctx != m_env_buffers[i].m_env.PRIM.CTXT) + continue; + + if (m_env.PRIM != m_env_buffers[i].m_env.PRIM) + continue; + + if (!std::memcmp(&m_env_buffers[i].m_env, &m_env, 88)) + { + GSDrawingEnvironment& buffered_ctx = m_env_buffers[i].m_env; + + if (buffered_ctx.CTXT[ctx].SCISSOR.U64 ^ cur_context.SCISSOR.U64) + continue; + + if (m_env.PRIM.FGE && buffered_ctx.FOGCOL != m_env.FOGCOL) + continue; + + if (m_env.PRIM.TME) + { + u64 mask = 0x1fffffffffull; // TBP0 TBW PSM TW TH TCC TFX + if ((cur_context.TEX0.PSM & 0x7) >= 3) + mask |= 0x1f78000000000000ull; // CPSM CSA + + if ((buffered_ctx.CTXT[ctx].TEX0.U64 ^ cur_context.TEX0.U64) & mask) + continue; + + const u64 clamp_mask = 0xFULL | (buffered_ctx.CTXT[ctx].CLAMP.WMS > 1 ? (0xFFFFFULL << 4) : 0) | (buffered_ctx.CTXT[ctx].CLAMP.WMT > 1 ? (0xFFFFFULL << 24) : 0); + if ((buffered_ctx.CTXT[ctx].CLAMP.U64 ^ cur_context.CLAMP.U64) & clamp_mask) + continue; + if (GSLocalMemory::m_psm[cur_context.TEX0.PSM].trbpp != 32 && buffered_ctx.TEXA.U64 ^ m_env.TEXA.U64) + continue; + if (buffered_ctx.CTXT[ctx].TEX1.U32[0] ^ cur_context.TEX1.U32[0]) + continue; + if (cur_context.TEX1.MXL) + { + if (buffered_ctx.CTXT[ctx].TEX1.U32[1] ^ cur_context.TEX1.U32[1]) + continue; + if (buffered_ctx.CTXT[ctx].MIPTBP1.U64 ^ cur_context.MIPTBP1.U64) + continue; + if (cur_context.TEX1.MXL > 3 && buffered_ctx.CTXT[ctx].MIPTBP2.U64 ^ cur_context.MIPTBP2.U64) + continue; + } + } + if (buffered_ctx.CTXT[ctx].FRAME.U64 ^ cur_context.FRAME.U64) + continue; + if (buffered_ctx.CTXT[ctx].XYOFFSET.U64 ^ cur_context.XYOFFSET.U64) + continue; + if (buffered_ctx.CTXT[ctx].ZBUF.U64 ^ cur_context.ZBUF.U64) + continue; + if (buffered_ctx.PRIM.ABE && buffered_ctx.CTXT[ctx].ALPHA.U64 ^ cur_context.ALPHA.U64) + continue; + if (buffered_ctx.CTXT[ctx].FBA.FBA != cur_context.FBA.FBA) + continue; + if (buffered_ctx.CTXT[ctx].TEST.U32[0] ^ cur_context.TEST.U32[0]) + continue; + + if (buffered_ctx.DTHE.DTHE != m_env.DTHE.DTHE || (m_env.DTHE.DTHE && buffered_ctx.DIMX.U64 ^ m_env.DIMX.U64)) + continue; + if (buffered_ctx.COLCLAMP.CLAMP != m_env.COLCLAMP.CLAMP) + continue; + + if (i != m_current_buffer_idx) + { + // We found a matching draw + //DevCon.Warning("Matching buffered draw detected in index %d, using", i); + m_index = &m_index_buffers[i]; + m_vertex = &m_vertex_buffers[i]; + + const u32 base = /*std::min(*/m_vertex_buffers[m_current_buffer_idx].head/*, m_vertex_buffers[m_current_buffer_idx].next)*/; + const u32 copy_amt = m_vertex_buffers[m_current_buffer_idx].tail - base; + + m_recent_buffer_switch = true; + //i < m_current_buffer_idx; + m_vertex->tail = m_index->buff[m_index->tail - 1] + 1; + + if (copy_amt) + memcpy(&m_vertex->buff[m_vertex->tail], &m_vertex_buffers[m_current_buffer_idx].buff[base], sizeof(GSVertex) * copy_amt); + + m_vertex->head = m_vertex->tail + (m_vertex_buffers[m_current_buffer_idx].head - base); + m_vertex->next = m_vertex->head; + //m_vertex->tail + (m_vertex_buffers[m_current_buffer_idx].next - base); + m_vertex->tail += copy_amt; + + m_dirty_gs_regs = 0; + m_backed_up_ctx = m_env_buffers[i].m_backed_up_ctx; + temp_draw_rect = m_env_buffers[i].draw_rect; + std::memcpy(&m_prev_env, &m_env_buffers[i].m_env, 88); + std::memcpy(&m_prev_env.CTXT[0], &m_env_buffers[i].m_env.CTXT[0], 96); + std::memcpy(&m_prev_env.CTXT[1], &m_env_buffers[i].m_env.CTXT[1], 96); + std::memcpy(&m_prev_env.CTXT[ctx].offset, &m_env_buffers[i].m_env.CTXT[ctx].offset, sizeof(m_env_buffers[i].m_env.CTXT[ctx].offset)); + std::memcpy(&m_prev_env.CTXT[ctx].scissor, &m_env_buffers[i].m_env.CTXT[ctx].scissor, sizeof(m_env_buffers[i].m_env.CTXT[ctx].scissor)); + + UpdateContext(); + + if (copy_amt) + { + for (u32 j = 0; j < copy_amt; j++) + { + GSVector4i* RESTRICT vert_ptr = (GSVector4i*)&m_vertex->buff[m_vertex->head + j]; + GSVector4i v = vert_ptr[1]; + v = v.xxxx().u16to32().sub32(m_xyof); + v = v.blend32<12>(v.sra32<4>()); + m_vertex->xy[j & 3] = v; + + if (j == 0) + m_vertex->xyhead = v; + + m_vertex->xy_tail = copy_amt; + } + } + else + m_vertex->xy_tail = 0; + + m_current_buffer_idx = i; + + } + + //DevCon.Warning("Picking buffer %d vertex tail %d index tail %d TME %d TBP0 0x%x dirty %x draw %d", m_current_buffer_idx, m_vertex->tail, m_index->tail, m_env.PRIM.TME, m_env.CTXT[m_env.PRIM.CTXT].TEX0.TBP0, m_dirty_gs_regs, s_n); + return true; + } + } + + // If we didn't find an existing one and we have no room, we need to flush. + if (m_used_buffers_idx >= MAX_DRAW_BUFFERS) + return false; + /*if (!cur_context.ZBUF.ZMSK || (cur_context.TEST.ATE && cur_context.TEST.AFAIL == AFAIL_ZB_ONLY)) + return false;*/ + /*if (!cur_context.ZBUF.ZMSK && (!m_env.PRIM.ABE || cur_context.ALPHA.IsOpaque() || !cur_context.ALPHA.IsCdInBlend())) + return false; + */ + PushBuffer(); + //DevCon.Warning("Buffering new draw! now buffering %d", m_used_buffers_idx); + + return true; +} + +void GSState::SetDrawBufferEnv() +{ + memcpy(&m_env_buffers[m_current_buffer_idx].m_env, &m_env, sizeof(GSDrawingEnvironment)); + m_env_buffers[m_current_buffer_idx].m_backed_up_ctx = m_backed_up_ctx; +} + +void GSState::SetDrawBuffDirty() +{ + m_env_buffers[m_current_buffer_idx].m_dirty_regs = m_dirty_gs_regs; + m_env_buffers[m_current_buffer_idx].draw_rect = temp_draw_rect; +} + +void GSState::ResetHandlers() + { std::fill(std::begin(m_fpGIFPackedRegHandlers), std::end(m_fpGIFPackedRegHandlers), &GSState::GIFPackedRegHandlerNull); m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerPRIM; @@ -599,11 +1018,11 @@ void GSState::DumpVertices(const std::string& filename) // Dump vertices file << "vertex: # " << GSUtil::GetPrimClassName(m_vt.m_primclass) << std::endl; - const u32 count = m_index.tail; - GSVertex* buffer = &m_vertex.buff[0]; + const u32 count = m_index->tail; + GSVertex* buffer = &m_vertex->buff[0]; for (u32 i = 0; i < count; ++i) { - GSVertex v = buffer[m_index.buff[i]]; + GSVertex v = buffer[m_index->buff[i]]; if ((n > 1) && (i > 0) && ((i % n) == 0)) file << std::endl; @@ -641,7 +1060,7 @@ void GSState::DumpVertices(const std::string& filename) file << std::endl; file << INDENT << LIST_ITEM << OPEN_MAP; - WriteSTQ_vert(buffer[m_index.buff[i]]); + WriteSTQ_vert(buffer[m_index->buff[i]]); file << CLOSE_MAP; WriteVertexIndex(i); @@ -880,10 +1299,17 @@ void GSState::DumpTransferImages() __inline void GSState::CheckFlushes() { - if (m_dirty_gs_regs && m_index.tail > 0) + if (m_dirty_gs_regs && m_index->tail > 0) { if (TestDrawChanged()) - Flush(GSFlushReason::CONTEXTCHANGE); + { + SetDrawBuffDirty(); + if (!CanBufferNewDraw()) + { + FlushBuffers(false); + ResetDrawBufferIdx(); + } + } } } @@ -1084,12 +1510,12 @@ __forceinline void GSState::ApplyPRIM(u32 prim) UpdateVertexKick(); - pxAssert(m_index.tail == 0 || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next); + pxAssert(m_index->tail == 0 || m_index->buff[m_index->tail - 1] + 1 == m_vertex->next); - if (m_index.tail == 0) - m_vertex.next = 0; + if (m_index->tail == 0) + m_vertex->next = 0; - m_vertex.head = m_vertex.tail = m_vertex.next; // remove unused vertices from the end of the vertex buffer + m_vertex->head = m_vertex->tail = m_vertex->next; // remove unused vertices from the end of the vertex buffer } void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r) @@ -1192,11 +1618,13 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0) // No need to flush on CLUT if we aren't texture mapping. if (wt) { - if ((m_prev_env.PRIM.TME && (m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0.PSM & 0x7) >= 3) || (m_mem.m_clut.IsInvalid() & 2)) - Flush(GSFlushReason::CLUTCHANGE); - else - FlushWrite(); - + for (u32 b = 0; b < m_used_buffers_idx; b++) + { + GSDrawingEnvironment& buffered_env = m_env_buffers[b].m_env; + if ((buffered_env.PRIM.TME && (buffered_env.CTXT[buffered_env.PRIM.CTXT].TEX0.PSM & 0x7) >= 3) || (m_mem.m_clut.IsInvalid() & 2)) + Flush(GSFlushReason::CLUTCHANGE); + } + FlushWrite(); // Abort any channel shuffle skipping, since this is likely part of a new shuffle. // Test case: Tomb Raider series. This is gated by the CBP actually changing, because // Urban Chaos writes to the memory backing the CLUT in the middle of a shuffle, and @@ -1821,9 +2249,19 @@ void GSState::GIFRegHandlerHWREG(const GIFReg* RESTRICT r) void GSState::Flush(GSFlushReason reason) { + SetDrawBuffDirty(); + FlushWrite(); - if (m_index.tail > 0) + FlushBuffers(true, reason); + ResetDrawBufferIdx(); +} + +void GSState::FlushDraw(GSFlushReason reason) +{ + FlushWrite(); + + if (m_index->tail > 0) { // Unless Vsync really needs the pending draw, don't do it when VSync happens as it can really screw up our heuristics when looking ahead. if (reason == VSYNC) @@ -1859,24 +2297,17 @@ void GSState::Flush(GSFlushReason reason) // Used to prompt the current draw that it's modifying its own CLUT. CheckCLUTValidity(m_prev_env.PRIM.PRIM); - if (m_dirty_gs_regs) - { - m_draw_env = &m_prev_env; - PRIM = &m_prev_env.PRIM; - UpdateContext(); + m_draw_env = &m_prev_env; + PRIM = &m_prev_env.PRIM; + UpdateContext(); - FlushPrim(); + FlushPrim(); - m_draw_env = &m_env; - PRIM = &m_env.PRIM; - UpdateContext(); + m_draw_env = &m_env; + PRIM = &m_env.PRIM; + UpdateContext(); - m_backed_up_ctx = -1; - } - else - { - FlushPrim(); - } + m_backed_up_ctx = -1; m_dirty_gs_regs = 0; temp_draw_rect = GSVector4i::zero(); @@ -1983,7 +2414,7 @@ u32 GSState::CalcMask(int exp, int max_exp) void GSState::FlushPrim() { - if (m_index.tail > 0) + if (m_index->tail > 0) { GL_REG("FlushPrim ctxt %d", PRIM->CTXT); @@ -2003,9 +2434,9 @@ void GSState::FlushPrim() GSVertex buff[2]; s_n++; - const u32 head = m_vertex.head; - const u32 tail = m_vertex.tail; - const u32 next = m_vertex.next; + const u32 head = m_vertex->head; + const u32 tail = m_vertex->tail; + const u32 next = m_vertex->next; u32 unused = 0; if (tail > head) @@ -2019,19 +2450,19 @@ void GSState::FlushPrim() case GS_LINESTRIP: case GS_SPRITE: unused = 1; - buff[0] = m_vertex.buff[tail - 1]; + buff[0] = m_vertex->buff[tail - 1]; break; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: unused = std::min(tail - head, 2); - memcpy(buff, &m_vertex.buff[tail - unused], sizeof(GSVertex) * 2); + memcpy(buff, &m_vertex->buff[tail - unused], sizeof(GSVertex) * 2); break; case GS_TRIANGLEFAN: - buff[0] = m_vertex.buff[head]; + buff[0] = m_vertex->buff[head]; unused = 1; if (tail - 1 > head) { - buff[1] = m_vertex.buff[tail - 1]; + buff[1] = m_vertex->buff[tail - 1]; unused = 2; } break; @@ -2055,7 +2486,7 @@ void GSState::FlushPrim() #endif // Update scissor, it may have been modified by a previous draw m_env.CTXT[PRIM->CTXT].UpdateScissor(); - m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); + m_vt.Update(m_vertex->buff, m_index->buff, m_vertex->tail, m_index->tail, GSUtil::GetPrimClass(PRIM->PRIM)); // Texel coordinate rounding // Helps Manhunt (lights shining through objects). @@ -2067,13 +2498,13 @@ void GSState::FlushPrim() { const bool is_sprite = GSUtil::GetPrimClass(PRIM->PRIM) == GS_PRIM_CLASS::GS_SPRITE_CLASS; // ST's have the lowest 9 bits (or greater depending on exponent difference) rounding down (from hardware tests). - for (int i = m_index.tail - 1; i >= 0; i--) + for (int i = m_index->tail - 1; i >= 0; i--) { - GSVertex* v = &m_vertex.buff[m_index.buff[i]]; + GSVertex* v = &m_vertex->buff[m_index->buff[i]]; // Only Q on the second vertex is valid if (!(i & 1) && is_sprite) - v->RGBAQ.Q = m_vertex.buff[m_index.buff[i + 1]].RGBAQ.Q; + v->RGBAQ.Q = m_vertex->buff[m_index->buff[i + 1]].RGBAQ.Q; int T = std::bit_cast(v->ST.T); int Q = std::bit_cast(v->RGBAQ.Q); @@ -2126,7 +2557,7 @@ void GSState::FlushPrim() Draw(); g_perfmon.Put(GSPerfMon::Draw, 1); - g_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM)); + g_perfmon.Put(GSPerfMon::Prim, m_index->tail / GSUtil::GetVertexCount(PRIM->PRIM)); if (GSConfig.ShouldDump(s_n, g_perfmon.GetFrame())) { @@ -2138,15 +2569,15 @@ void GSState::FlushPrim() } } - m_index.tail = 0; - m_vertex.head = 0; + m_index->tail = 0; + m_vertex->head = 0; if (unused > 0) { - memcpy(m_vertex.buff, buff, sizeof(GSVertex) * unused); + memcpy(m_vertex->buff, buff, sizeof(GSVertex) * unused); - m_vertex.tail = unused; - m_vertex.next = next > head ? next - head : 0; + m_vertex->tail = unused; + m_vertex->next = next > head ? next - head : 0; // If it's a Triangle fan the XY buffer needs to be updated to point to the correct head vert // Jak 3 shadows get spikey (with autoflush) if you don't. @@ -2154,26 +2585,25 @@ void GSState::FlushPrim() { for (u32 i = 0; i < unused; i++) { - GSVector4i* RESTRICT vert_ptr = (GSVector4i*)&m_vertex.buff[i]; + GSVector4i* RESTRICT vert_ptr = (GSVector4i*)&m_vertex->buff[i]; GSVector4i v = vert_ptr[1]; v = v.xxxx().u16to32().sub32(m_xyof); v = v.blend32<12>(v.sra32<4>()); - m_vertex.xy[i & 3] = v; - m_vertex.xy_tail = unused; + m_vertex->xy[i & 3] = v; + m_vertex->xy_tail = unused; } } } else { - m_vertex.tail = 0; - m_vertex.next = 0; + m_vertex->tail = 0; + m_vertex->next = 0; } } } -GSVector4i GSState::GetTEX0Rect() +GSVector4i GSState::GetTEX0Rect(GSDrawingContext prev_ctx) { GSVector4i ret = GSVector4i::zero(); - const GSDrawingContext& prev_ctx = m_prev_env.CTXT[m_prev_env.PRIM.CTXT]; if (prev_ctx.CLAMP.WMS <= 1) // CLAMP/REPEAT { @@ -2216,121 +2646,152 @@ void GSState::CheckWriteOverlap(bool req_write, bool req_read) const int h = m_env.TRXREG.RRH; const GIFRegBITBLTBUF& blit = m_env.BITBLTBUF; - const GSDrawingContext& prev_ctx = m_prev_env.CTXT[m_prev_env.PRIM.CTXT]; const GSVector4i write_rect = GSVector4i(m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, m_env.TRXPOS.DSAX + w, m_env.TRXPOS.DSAY + h); const u32 write_start_bp = GSLocalMemory::GetStartBlockAddress(blit.DBP, blit.DBW, blit.DPSM, write_rect); const u32 write_end_bp = ((GSLocalMemory::GetEndBlockAddress(blit.DBP, blit.DBW, blit.DPSM, write_rect) + 1) + (GS_BLOCKS_PER_PAGE - 1)) & ~(GS_BLOCKS_PER_PAGE - 1); - GSVector4i tex_rect = m_prev_env.PRIM.TME ? GetTEX0Rect() : GSVector4i::zero(); - if (m_index.tail > 0) + for (u32 i = 0; i < m_used_buffers_idx; i++) { - // Only flush on a NEW transfer if a pending one is using the same address or overlap. - // Check Fast & Furious (Hardare mode) and Assault Suits Valken (either renderer) and Tomb Raider - Angel of Darkness menu (TBP != DBP but overlaps). - // Cartoon Network overwrites its own Z buffer in the middle of a draw. - // Alias wraps its transfers, so be careful - const GSVector4i read_rect = GSVector4i(m_env.TRXPOS.SSAX, m_env.TRXPOS.SSAY, m_env.TRXPOS.SSAX + w, m_env.TRXPOS.SSAY + h); + GSIndexBuff* cur_index_buff = &m_index_buffers[i]; + GSVertexBuff* cur_vertex_buff = &m_vertex_buffers[i]; + const GSDrawingContext& prev_ctx = m_env_buffers[i].m_env.CTXT[m_env_buffers[i].m_backed_up_ctx]; + const GSDrawingEnvironment& prev_env = m_env_buffers[i].m_env; + GSVector4i tex_rect = prev_env.PRIM.TME ? GetTEX0Rect(prev_ctx) : GSVector4i::zero(); - if (req_write && m_prev_env.PRIM.TME) + if (cur_index_buff->tail > 0) { - // Tex rect could be invalid showing 1024x1024 when it isn't. If the frame is only 1 page wide, it's either a big strip or a single page draw. - // This large texture causes misdetection of overlapping writes, causing our heuristics in the hardware renderer for future draws to be missing. - // Either way if we check the queued up coordinates, it should give us a fair idea. (Cabela's Trophy Bucks) - if (prev_ctx.FRAME.FBW == 1 && static_cast(tex_rect.width()) > (prev_ctx.TEX0.TBW * 64)) + // Only flush on a NEW transfer if a pending one is using the same address or overlap. + // Check Fast & Furious (Hardare mode) and Assault Suits Valken (either renderer) and Tomb Raider - Angel of Darkness menu (TBP != DBP but overlaps). + // Cartoon Network overwrites its own Z buffer in the middle of a draw. + // Alias wraps its transfers, so be careful + const GSVector4i read_rect = GSVector4i(m_env.TRXPOS.SSAX, m_env.TRXPOS.SSAY, m_env.TRXPOS.SSAX + w, m_env.TRXPOS.SSAY + h); + + if (req_write && prev_env.PRIM.TME) { - GSVector4i tex_draw_rect = GSVector4i::zero(); - for (u32 i = 0; i < m_index.tail; i++) + // Tex rect could be invalid showing 1024x1024 when it isn't. If the frame is only 1 page wide, it's either a big strip or a single page draw. + // This large texture causes misdetection of overlapping writes, causing our heuristics in the hardware renderer for future draws to be missing. + // Either way if we check the queued up coordinates, it should give us a fair idea. (Cabela's Trophy Bucks) + if (prev_ctx.FRAME.FBW == 1 && static_cast(tex_rect.width()) > (prev_ctx.TEX0.TBW * 64)) { - const GSVertex* v = &m_vertex.buff[m_index.buff[i]]; - GSVector2i tex_coord; - if (PRIM->FST) + GSVector4i tex_draw_rect = GSVector4i::zero(); + for (u32 i = 0; i < cur_index_buff->tail; i++) { - tex_coord.x = v->U >> 4; - tex_coord.y = v->V >> 4; - } - else - { - const float s = std::min((v->ST.S / v->RGBAQ.Q), 1.0f); - const float t = std::min((v->ST.T / v->RGBAQ.Q), 1.0f); + const GSVertex* v = &cur_vertex_buff->buff[cur_index_buff->buff[i]]; + GSVector2i tex_coord; + if (PRIM->FST) + { + tex_coord.x = v->U >> 4; + tex_coord.y = v->V >> 4; + } + else + { + const float s = std::min((v->ST.S / v->RGBAQ.Q), 1.0f); + const float t = std::min((v->ST.T / v->RGBAQ.Q), 1.0f); - tex_coord.x = static_cast(std::round((1 << m_context->TEX0.TW) * s)); - tex_coord.y = static_cast(std::round((1 << m_context->TEX0.TH) * t)); + tex_coord.x = static_cast(std::round((1 << prev_ctx.TEX0.TW) * s)); + tex_coord.y = static_cast(std::round((1 << prev_ctx.TEX0.TH) * t)); + } + + if (i == 0) + { + tex_draw_rect.x = tex_coord.x; + tex_draw_rect.y = tex_coord.y; + tex_draw_rect.z = tex_coord.x; + tex_draw_rect.w = tex_coord.y; + + continue; + } + + tex_draw_rect.x = std::min(tex_draw_rect.x, tex_coord.x); + tex_draw_rect.z = std::max(tex_draw_rect.z, tex_coord.x); + tex_draw_rect.y = std::min(tex_draw_rect.y, tex_coord.y); + tex_draw_rect.w = std::max(tex_draw_rect.w, tex_coord.y); } - if (i == 0) - { - tex_draw_rect.x = tex_coord.x; - tex_draw_rect.y = tex_coord.y; - tex_draw_rect.z = tex_coord.x; - tex_draw_rect.w = tex_coord.y; - - continue; - } - - tex_draw_rect.x = std::min(tex_draw_rect.x, tex_coord.x); - tex_draw_rect.z = std::max(tex_draw_rect.z, tex_coord.x); - tex_draw_rect.y = std::min(tex_draw_rect.y, tex_coord.y); - tex_draw_rect.w = std::max(tex_draw_rect.w, tex_coord.y); + tex_rect = tex_rect.rintersect(tex_draw_rect); } - tex_rect = tex_rect.rintersect(tex_draw_rect); - } - - if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.TEX0.TBP0, prev_ctx.TEX0.TBW, prev_ctx.TEX0.PSM, tex_rect)) - { - - Flush(GSFlushReason::UPLOADDIRTYTEX); - } - if (prev_ctx.TEX1.MXL > 0 && prev_ctx.TEX1.MMIN >= 2 && prev_ctx.TEX1.MMIN <= 5) - { - switch (prev_ctx.TEX1.MXL) + if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.TEX0.TBP0, prev_ctx.TEX0.TBW, prev_ctx.TEX0.PSM, tex_rect)) { - case 6: - if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP2.TBP6, prev_ctx.MIPTBP2.TBW6, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 6, tex_rect.y >> 6, tex_rect.z >> 6, tex_rect.w >> 6))) - Flush(GSFlushReason::UPLOADDIRTYTEX); - [[fallthrough]]; - case 5: - if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP2.TBP5, prev_ctx.MIPTBP2.TBW5, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 5, tex_rect.y >> 5, tex_rect.z >> 5, tex_rect.w >> 5))) - Flush(GSFlushReason::UPLOADDIRTYTEX); - [[fallthrough]]; - case 4: - if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP2.TBP4, prev_ctx.MIPTBP2.TBW4, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 4, tex_rect.y >> 4, tex_rect.z >> 4, tex_rect.w >> 4))) - Flush(GSFlushReason::UPLOADDIRTYTEX); - [[fallthrough]]; - case 3: - if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP1.TBP3, prev_ctx.MIPTBP1.TBW3, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 3, tex_rect.y >> 3, tex_rect.z >> 3, tex_rect.w >> 3))) - Flush(GSFlushReason::UPLOADDIRTYTEX); - [[fallthrough]]; - case 2: - if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP1.TBP2, prev_ctx.MIPTBP1.TBW2, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 2, tex_rect.y >> 2, tex_rect.z >> 2, tex_rect.w >> 2))) - Flush(GSFlushReason::UPLOADDIRTYTEX); - [[fallthrough]]; - case 1: - if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP1.TBP1, prev_ctx.MIPTBP1.TBW1, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 1, tex_rect.y >> 1, tex_rect.z >> 1, tex_rect.w >> 1))) - Flush(GSFlushReason::UPLOADDIRTYTEX); - break; + Flush(GSFlushReason::UPLOADDIRTYTEX); + continue; + } + if (prev_ctx.TEX1.MXL > 0 && prev_ctx.TEX1.MMIN >= 2 && prev_ctx.TEX1.MMIN <= 5) + { + switch (prev_ctx.TEX1.MXL) + { + case 6: + if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP2.TBP6, prev_ctx.MIPTBP2.TBW6, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 6, tex_rect.y >> 6, tex_rect.z >> 6, tex_rect.w >> 6))) + { + Flush(GSFlushReason::UPLOADDIRTYTEX); + continue; + } + [[fallthrough]]; + case 5: + if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP2.TBP5, prev_ctx.MIPTBP2.TBW5, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 5, tex_rect.y >> 5, tex_rect.z >> 5, tex_rect.w >> 5))) + { + Flush(GSFlushReason::UPLOADDIRTYTEX); + continue; + } + [[fallthrough]]; + case 4: + if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP2.TBP4, prev_ctx.MIPTBP2.TBW4, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 4, tex_rect.y >> 4, tex_rect.z >> 4, tex_rect.w >> 4))) + { + Flush(GSFlushReason::UPLOADDIRTYTEX); + continue; + } + [[fallthrough]]; + case 3: + if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP1.TBP3, prev_ctx.MIPTBP1.TBW3, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 3, tex_rect.y >> 3, tex_rect.z >> 3, tex_rect.w >> 3))) + { + Flush(GSFlushReason::UPLOADDIRTYTEX); + continue; + } + [[fallthrough]]; + case 2: + if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP1.TBP2, prev_ctx.MIPTBP1.TBW2, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 2, tex_rect.y >> 2, tex_rect.z >> 2, tex_rect.w >> 2))) + { + Flush(GSFlushReason::UPLOADDIRTYTEX); + continue; + } + [[fallthrough]]; + case 1: + if (GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.MIPTBP1.TBP1, prev_ctx.MIPTBP1.TBW1, prev_ctx.TEX0.PSM, GSVector4i(tex_rect.x >> 1, tex_rect.y >> 1, tex_rect.z >> 1, tex_rect.w >> 1))) + { + Flush(GSFlushReason::UPLOADDIRTYTEX); + continue; + } + break; + } } } - } - const u32 frame_mask = GSLocalMemory::m_psm[prev_ctx.FRAME.PSM].fmsk; - const bool frame_required = (!(prev_ctx.TEST.ATE && prev_ctx.TEST.ATST == 0 && (prev_ctx.TEST.AFAIL == 2 || prev_ctx.TEST.AFAIL == 0)) && ((prev_ctx.FRAME.FBMSK & frame_mask) != frame_mask)) || prev_ctx.TEST.DATE; - if (frame_required) - { - const GSFlushReason reason = req_write ? (req_read ? GSFlushReason::LOCALTOLOCALMOVE : GSFlushReason::UPLOADDIRTYFRAME) : GSFlushReason::DOWNLOADFIFO; + const u32 frame_mask = GSLocalMemory::m_psm[prev_ctx.FRAME.PSM].fmsk; + const bool frame_required = (!(prev_ctx.TEST.ATE && prev_ctx.TEST.ATST == 0 && (prev_ctx.TEST.AFAIL == 2 || prev_ctx.TEST.AFAIL == 0)) && ((prev_ctx.FRAME.FBMSK & frame_mask) != frame_mask)) || prev_ctx.TEST.DATE; + if (frame_required) + { + const GSFlushReason reason = req_write ? (req_read ? GSFlushReason::LOCALTOLOCALMOVE : GSFlushReason::UPLOADDIRTYFRAME) : GSFlushReason::DOWNLOADFIFO; - if ((req_write && (blit.DBP == prev_ctx.FRAME.Block() || GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.FRAME.Block(), prev_ctx.FRAME.FBW, prev_ctx.FRAME.PSM, temp_draw_rect))) || - (req_read && (blit.DBP == prev_ctx.FRAME.Block() || GSLocalMemory::HasOverlap(blit.SBP, blit.SBW, blit.SPSM, read_rect, prev_ctx.FRAME.Block(), prev_ctx.FRAME.FBW, prev_ctx.FRAME.PSM, temp_draw_rect)))) - Flush(reason); - } + if ((req_write && (blit.DBP == prev_ctx.FRAME.Block() || GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.FRAME.Block(), prev_ctx.FRAME.FBW, prev_ctx.FRAME.PSM, m_env_buffers[i].draw_rect))) || + (req_read && (blit.DBP == prev_ctx.FRAME.Block() || GSLocalMemory::HasOverlap(blit.SBP, blit.SBW, blit.SPSM, read_rect, prev_ctx.FRAME.Block(), prev_ctx.FRAME.FBW, prev_ctx.FRAME.PSM, m_env_buffers[i].draw_rect)))) + { + Flush(reason); + continue; + } + } - const bool zbuf_required = (!(prev_ctx.TEST.ATE && prev_ctx.TEST.ATST == 0 && prev_ctx.TEST.AFAIL != 2) && !prev_ctx.ZBUF.ZMSK) || (prev_ctx.TEST.ZTE && prev_ctx.TEST.ZTST > ZTST_ALWAYS); - if (zbuf_required) - { - const GSFlushReason reason = req_write ? (req_read ? GSFlushReason::LOCALTOLOCALMOVE : GSFlushReason::UPLOADDIRTYZBUF) : GSFlushReason::DOWNLOADFIFO; + const bool zbuf_required = (!(prev_ctx.TEST.ATE && prev_ctx.TEST.ATST == 0 && prev_ctx.TEST.AFAIL != 2) && !prev_ctx.ZBUF.ZMSK) || (prev_ctx.TEST.ZTE && prev_ctx.TEST.ZTST > ZTST_ALWAYS); + if (zbuf_required) + { + const GSFlushReason reason = req_write ? (req_read ? GSFlushReason::LOCALTOLOCALMOVE : GSFlushReason::UPLOADDIRTYZBUF) : GSFlushReason::DOWNLOADFIFO; - if ((req_write && (blit.DBP == prev_ctx.ZBUF.Block() || GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.ZBUF.Block(), prev_ctx.FRAME.FBW, prev_ctx.ZBUF.PSM, temp_draw_rect))) || - (req_read && (blit.DBP == prev_ctx.ZBUF.Block() || GSLocalMemory::HasOverlap(blit.SBP, blit.SBW, blit.SPSM, read_rect, prev_ctx.ZBUF.Block(), prev_ctx.FRAME.FBW, prev_ctx.ZBUF.PSM, temp_draw_rect)))) - Flush(reason); + if ((req_write && (blit.DBP == prev_ctx.ZBUF.Block() || GSLocalMemory::HasOverlap(blit.DBP, blit.DBW, blit.DPSM, write_rect, prev_ctx.ZBUF.Block(), prev_ctx.FRAME.FBW, prev_ctx.ZBUF.PSM, m_env_buffers[i].draw_rect))) || + (req_read && (blit.DBP == prev_ctx.ZBUF.Block() || GSLocalMemory::HasOverlap(blit.SBP, blit.SBW, blit.SPSM, read_rect, prev_ctx.ZBUF.Block(), prev_ctx.FRAME.FBW, prev_ctx.ZBUF.PSM, m_env_buffers[i].draw_rect)))) + { + Flush(reason); + continue; + } + } } } @@ -3277,10 +3738,10 @@ void GSState::UpdateVertexKick() void GSState::GrowVertexBuffer() { - const u32 maxcount = std::max(m_vertex.maxcount * 3 / 2, 10000); - const u32 old_vertex_size = sizeof(GSVertex) * m_vertex.tail; + const u32 maxcount = std::max(m_vertex->maxcount * 3 / 2, 10000); + const u32 old_vertex_size = sizeof(GSVertex) * m_vertex->tail; const u32 new_vertex_size = sizeof(GSVertex) * maxcount; - const u32 old_index_size = sizeof(u16) * m_index.tail; + const u32 old_index_size = sizeof(u16) * m_index->tail; const u32 new_index_size = sizeof(u16) * maxcount * 6; // Worst case index list is a list of points with vs expansion, 6 indices per point // Structure describing buffers to reallocate @@ -3291,11 +3752,11 @@ void GSState::GrowVertexBuffer() u32 new_size; }; const std::array alloc_desc = {{ - {reinterpret_cast(&m_vertex.buff), old_vertex_size, new_vertex_size}, + {reinterpret_cast(&m_vertex->buff), old_vertex_size, new_vertex_size}, // discard contents of buff_copy by setting old_size = 0 - {reinterpret_cast(&m_vertex.buff_copy), 0, new_vertex_size}, + {reinterpret_cast(&m_vertex->buff_copy), 0, new_vertex_size}, {reinterpret_cast(&m_draw_vertex.buff), old_vertex_size, new_vertex_size}, - {reinterpret_cast(&m_index.buff), old_index_size, new_index_size}, + {reinterpret_cast(&m_index->buff), old_index_size, new_index_size}, {reinterpret_cast(&m_draw_index.buff), old_index_size, new_index_size} }}; @@ -3324,7 +3785,7 @@ void GSState::GrowVertexBuffer() *pbuff = new_buff; } - m_vertex.maxcount = maxcount - 3; // -3 to have some space at the end of the buffer before DrawingKick can grow it + m_vertex->maxcount = maxcount - 3; // -3 to have some space at the end of the buffer before DrawingKick can grow it } // For returning order of vertices to form a right triangle @@ -3624,7 +4085,7 @@ bool GSState::TrianglesAreQuadsImpl() quad_check_valid = true; are_quads = true; - if (m_index.tail % 6 != 0) + if (m_index->tail % 6 != 0) { are_quads = false; return false; @@ -3632,11 +4093,11 @@ bool GSState::TrianglesAreQuadsImpl() constexpr GSVector4i offset = shuffle_check ? GSVector4i::cxpr(8 << 4, 0, 8 << 4, 0) : GSVector4i::cxpr(0); - const GSVertex* RESTRICT v = m_vertex.buff; - const u16* RESTRICT index = m_index.buff; - const size_t count = m_index.tail; + const GSVertex* RESTRICT v = m_vertex->buff; + const u16* RESTRICT index = m_index->buff; + const size_t count = m_index->tail; - if (m_index.tail == 6) + if (m_index->tail == 6) { // Non-axis aligned check when only two triangles are_quads = AreTrianglesQuadNonAA(v, &index[0], &index[3]); @@ -3726,9 +4187,9 @@ GSState::PRIM_OVERLAP GSState::GetPrimitiveOverlapDrawlistImpl(bool save_drawlis pxAssert(!save_bbox || save_drawlist); // We should only save bboxes when saving drawlist. - const GSVertex* RESTRICT v = m_vertex.buff; - const u16* RESTRICT index = m_index.buff; - const u32 count = m_index.tail; + const GSVertex* RESTRICT v = m_vertex->buff; + const u16* RESTRICT index = m_index->buff; + const u32 count = m_index->tail; // Optimize out using indices for sprites and points; probably not much difference. const auto GetIndex = [&](int i) { @@ -3745,7 +4206,7 @@ GSState::PRIM_OVERLAP GSState::GetPrimitiveOverlapDrawlistImpl(bool save_drawlis if (primclass == GS_TRIANGLE_CLASS && m_quad_check_valid && m_are_quads) { // The triangles-are-quads check already ensures that there is no overlap. - m_drawlist.push_back(m_index.tail / n); + m_drawlist.push_back(m_index->tail / n); if (save_bbox) { const GSVector4i draw_area = GSVector4i(m_vt.m_min.p.upld(m_vt.m_max.p) * GSVector4(16.0f)) + xyof; @@ -3936,11 +4397,11 @@ GSState::PRIM_OVERLAP GSState::PrimitiveOverlap(bool save_drawlist) { // Assume that 1 triangle, 1 sprite, 1 line, or 3 points can't overlap. // Not fully accurate for points but low probability that they overlap. - if (m_vertex.next < 4) + if (m_vertex->next < 4) return PRIM_OVERLAP_NO; if (m_vt.m_primclass == GS_TRIANGLE_CLASS) - return (m_index.tail == 6 && TrianglesAreQuads()) ? PRIM_OVERLAP_NO : PRIM_OVERLAP_UNKNOW; + return (m_index->tail == 6 && TrianglesAreQuads()) ? PRIM_OVERLAP_NO : PRIM_OVERLAP_UNKNOW; else if (m_vt.m_primclass != GS_SPRITE_CLASS) return PRIM_OVERLAP_UNKNOW; // maybe, maybe not @@ -3951,7 +4412,7 @@ bool GSState::SpriteDrawWithoutGaps() { // Check that the height matches. Xenosaga 3 draws a letterbox around // the FMV with a sprite at the top and bottom of the framebuffer. - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; const int first_dpY = v[1].XYZ.Y - v[0].XYZ.Y; const int first_dpX = v[1].XYZ.X - v[0].XYZ.X; @@ -3959,7 +4420,7 @@ bool GSState::SpriteDrawWithoutGaps() if (((first_dpX + 8) >> 4) == m_r_no_scissor.z) { // Borrowed from MergeSprite() modified to calculate heights. - for (u32 i = 2; i < m_vertex.next; i += 2) + for (u32 i = 2; i < m_vertex->next; i += 2) { const int last_pY = v[i - 1].XYZ.Y; const int dpY = v[i + 1].XYZ.Y - v[i].XYZ.Y; @@ -3976,7 +4437,7 @@ bool GSState::SpriteDrawWithoutGaps() { // Borrowed from MergeSprite(). const int offset_X = m_context->XYOFFSET.OFX; - for (u32 i = 2; i < m_vertex.next; i += 2) + for (u32 i = 2; i < m_vertex->next; i += 2) { const int last_pX = v[i - 1].XYZ.X; const int this_start_X = v[i].XYZ.X; @@ -3993,7 +4454,7 @@ bool GSState::SpriteDrawWithoutGaps() else { const int dpY = v[i + 1].XYZ.Y - v[i].XYZ.Y; - if ((std::abs(dpY - first_dpY) >= 16 && (i + 2) < m_vertex.next) || std::abs(this_start_X - last_pX) >= 16) + if ((std::abs(dpY - first_dpY) >= 16 && (i + 2) < m_vertex->next) || std::abs(this_start_X - last_pX) >= 16) return false; } } @@ -4006,7 +4467,7 @@ bool GSState::SpriteDrawWithoutGaps() { int lastXEdge = std::max(v[1].XYZ.X, v[0].XYZ.X); int lastYEdge = std::max(v[1].XYZ.Y, v[0].XYZ.Y); - for (u32 i = 2; i < m_vertex.next; i += 2) + for (u32 i = 2; i < m_vertex->next; i += 2) { const int dpY = v[i + 1].XYZ.Y - v[i].XYZ.Y; @@ -4054,12 +4515,12 @@ void GSState::CalculatePrimitiveCoversWithoutGaps() if (m_vt.m_primclass == GS_POINT_CLASS) { - m_primitive_covers_without_gaps = (m_vertex.next < 2) ? m_primitive_covers_without_gaps : GapsFound; + m_primitive_covers_without_gaps = (m_vertex->next < 2) ? m_primitive_covers_without_gaps : GapsFound; return; } else if (m_vt.m_primclass == GS_TRIANGLE_CLASS) { - m_primitive_covers_without_gaps = ((m_index.tail == 6 || ((m_index.tail % 6) == 0 && m_primitive_covers_without_gaps == FullCover)) && TrianglesAreQuads()) ? m_primitive_covers_without_gaps : GapsFound; + m_primitive_covers_without_gaps = ((m_index->tail == 6 || ((m_index->tail % 6) == 0 && m_primitive_covers_without_gaps == FullCover)) && TrianglesAreQuads()) ? m_primitive_covers_without_gaps : GapsFound; return; } else if (m_vt.m_primclass != GS_SPRITE_CLASS) @@ -4069,7 +4530,7 @@ void GSState::CalculatePrimitiveCoversWithoutGaps() } // Simple case: one sprite. - if (m_primitive_covers_without_gaps != GapsFound && m_index.tail == 2) + if (m_primitive_covers_without_gaps != GapsFound && m_index->tail == 2) return; m_primitive_covers_without_gaps = SpriteDrawWithoutGaps() ? (m_primitive_covers_without_gaps == GapsFound ? SpriteNoGaps : m_primitive_covers_without_gaps) : GapsFound; @@ -4079,11 +4540,11 @@ __forceinline bool GSState::EarlyDetectShuffle(u32 prim) { // We only handle sprites here and need one sprite in the queue. // Texture mapping must be enabled for a shuffle. - if (m_index.tail < 2 || prim != GS_SPRITE || !PRIM->TME) + if (m_index->tail < 2 || prim != GS_SPRITE || !PRIM->TME) return false; - const GSVertex* RESTRICT vertex = &m_vertex.buff[0]; - const u16* RESTRICT index = &m_index.buff[0]; + const GSVertex* RESTRICT vertex = &m_vertex->buff[0]; + const u16* RESTRICT index = &m_index->buff[0]; if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16) { @@ -4225,7 +4686,7 @@ __forceinline bool GSState::EarlyDetectShuffle(u32 prim) // Heuristics to detect channel shuffle based on first sprite and clamp mode. const auto CheckWidthOrClampMode = [this]() -> bool { - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; const int draw_width = std::abs(v[1].XYZ.X - v[0].XYZ.X) >> 4; const int draw_height = std::abs(v[1].XYZ.Y - v[0].XYZ.Y) >> 4; @@ -4329,51 +4790,6 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim, int& tex_layer) return false; } -static constexpr u32 NumIndicesForPrim(u32 prim) -{ - switch (prim) - { - case GS_POINTLIST: - case GS_INVALID: - return 1; - case GS_LINELIST: - case GS_SPRITE: - case GS_LINESTRIP: - return 2; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - return 3; - default: - return 0; - } -} - -static constexpr u32 MaxVerticesForPrim(u32 prim) -{ - switch (prim) - { - // Four indices per 1 vertex. - case GS_POINTLIST: - case GS_INVALID: - - // Indices are shifted left by 2 to form quads. - case GS_LINELIST: - case GS_LINESTRIP: - return (std::numeric_limits::max() / 4) - 4; - - // Four indices per two vertices. - case GS_SPRITE: - return (std::numeric_limits::max() / 2) - 2; - - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - default: - return (std::numeric_limits::max() - 3); - } -} - __forceinline void GSState::CheckCLUTValidity(u32 prim) { if (m_mem.m_clut.IsInvalid() & 2) @@ -4381,22 +4797,26 @@ __forceinline void GSState::CheckCLUTValidity(u32 prim) u32 n = NumIndicesForPrim(prim); - const GSDrawingContext& ctx = m_prev_env.CTXT[m_prev_env.PRIM.CTXT]; - if ((m_index.tail > 0 || (m_vertex.tail == n - 1)) && (GSLocalMemory::m_psm[ctx.TEX0.PSM].pal == 0 || !m_prev_env.PRIM.TME)) + for (u32 i = 0; i < m_used_buffers_idx; i++) { - const GSLocalMemory::psm_t& fpsm = GSLocalMemory::m_psm[ctx.FRAME.PSM]; - const bool frame_needed = !(ctx.TEST.ATE && ctx.TEST.ATST == 0 && ctx.TEST.AFAIL == 2) && ((ctx.FRAME.FBMSK & fpsm.fmsk) != fpsm.fmsk); - if (frame_needed && GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp == fpsm.bpp) + GSDrawingEnvironment& buffered_env = m_env_buffers[i].m_env; + const GSDrawingContext& ctx = buffered_env.CTXT[buffered_env.PRIM.CTXT]; + if ((m_index_buffers[i].tail > 0 || (m_vertex_buffers[i].tail == n - 1)) && (GSLocalMemory::m_psm[ctx.TEX0.PSM].pal == 0 || !buffered_env.PRIM.TME)) { - const u32 startbp = fpsm.info.bn(temp_draw_rect.x, temp_draw_rect.y, ctx.FRAME.Block(), ctx.FRAME.FBW); + const GSLocalMemory::psm_t& fpsm = GSLocalMemory::m_psm[ctx.FRAME.PSM]; + const bool frame_needed = !(ctx.TEST.ATE && ctx.TEST.ATST == 0 && ctx.TEST.AFAIL == 2) && ((ctx.FRAME.FBMSK & fpsm.fmsk) != fpsm.fmsk); + if (frame_needed && GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp == fpsm.bpp) + { + const u32 startbp = fpsm.info.bn(temp_draw_rect.x, temp_draw_rect.y, ctx.FRAME.Block(), ctx.FRAME.FBW); - // If it's a point, then we only have one coord, so the address for start and end will be the same, which is bad for the following check. - u32 endbp = startbp; - // otherwise calculate the end. - if (prim != GS_POINTLIST || (m_index.tail > 1)) - endbp = fpsm.info.bn(temp_draw_rect.z - 1, temp_draw_rect.w - 1, ctx.FRAME.Block(), ctx.FRAME.FBW); + // If it's a point, then we only have one coord, so the address for start and end will be the same, which is bad for the following check. + u32 endbp = startbp; + // otherwise calculate the end. + if (prim != GS_POINTLIST || (m_index_buffers[i].tail > 1)) + endbp = fpsm.info.bn(temp_draw_rect.z - 1, temp_draw_rect.w - 1, ctx.FRAME.Block(), ctx.FRAME.FBW); - m_mem.m_clut.InvalidateRange(startbp, endbp, true); + m_mem.m_clut.InvalidateRange(startbp, endbp, true); + } } } } @@ -4405,7 +4825,7 @@ template __forceinline void GSState::HandleAutoFlush() { // Kind of a cheat, making the assumption that 2 consecutive fan/strip triangles won't overlap each other (*should* be safe) - if ((m_index.tail & 1) && (prim == GS_TRIANGLESTRIP || prim == GS_TRIANGLEFAN) && !m_texflush_flag) + if ((m_index->tail & 1) && (prim == GS_TRIANGLESTRIP || prim == GS_TRIANGLEFAN) && !m_texflush_flag) return; // To briefly explain what's going on here, what we are checking for is draws over a texture when the source and destination are themselves. @@ -4416,8 +4836,8 @@ __forceinline void GSState::HandleAutoFlush() { int n = 1; u32 buff[3]; - const u32 head = m_vertex.head; - const u32 tail = m_vertex.tail; + const u32 head = m_vertex->head; + const u32 tail = m_vertex->tail; switch (prim) { @@ -4484,7 +4904,7 @@ __forceinline void GSState::HandleAutoFlush() // Get the rest of the rect. for (int i = 0; i < (n - 1); i++) { - const GSVertex* v = &m_vertex.buff[buff[i]]; + const GSVertex* v = &m_vertex->buff[buff[i]]; xy_coord.x = (static_cast(v->XYZ.X) - static_cast(m_context->XYOFFSET.OFX)) >> 4; xy_coord.y = (static_cast(v->XYZ.Y) - static_cast(m_context->XYOFFSET.OFY)) >> 4; @@ -4536,7 +4956,7 @@ __forceinline void GSState::HandleAutoFlush() return; // Get the last texture position from the last draw. - const GSVertex* v = &m_vertex.buff[m_index.buff[m_index.tail - 1]]; + const GSVertex* v = &m_vertex->buff[m_index->buff[m_index->tail - 1]]; if (PRIM->FST) { @@ -4626,13 +5046,13 @@ __forceinline void GSState::HandleAutoFlush() const GSVector2i offset = GSVector2i(m_context->XYOFFSET.OFX, m_context->XYOFFSET.OFY); const GSVector4i scissor = m_context->scissor.in; GSVector4i old_draw_rect = GSVector4i::zero(); - int current_draw_end = m_index.tail; + int current_draw_end = m_index->tail; while (current_draw_end >= n) { for (int i = current_draw_end - 1; i >= current_draw_end - n; i--) { - const GSVertex* v = &m_vertex.buff[m_index.buff[i]]; + const GSVertex* v = &m_vertex->buff[m_index->buff[i]]; if (prim == GS_SPRITE && (i & 1)) { @@ -4714,29 +5134,133 @@ __forceinline void GSState::HandleAutoFlush() } } +bool GSState::CheckOverlapVerts(u32 n) +{ + if (m_recent_buffer_switch && ((m_vertex->tail + 1) - m_vertex->head) == n) + { + m_recent_buffer_switch = false; + + /*if (!m_env.PRIM.ABE || m_env.CTXT[m_env.PRIM.CTXT].ALPHA.IsOpaque()) + return false;*/ + + if (m_used_buffers_idx > 1) + { + const GSVertex* v = &m_vertex->buff[0]; + + GSVector4i new_area = GSVector4i(m_v.XYZ.X - m_context->XYOFFSET.OFX, m_v.XYZ.Y - m_context->XYOFFSET.OFY).xyxy(); + for (u32 i = 0; i < (n - 1); i++) + { + const int pos = (m_vertex->tail - 1) - i; + + GSVector2i pre_vert; + if (m_env.PRIM.PRIM == GS_TRIANGLEFAN) + pre_vert = GSVector2i(m_vertex->xyhead.x - m_context->XYOFFSET.OFX, m_vertex->xyhead.y - m_context->XYOFFSET.OFY); + else + pre_vert = GSVector2i(v[pos].XYZ.X - m_context->XYOFFSET.OFX, v[pos].XYZ.Y - m_context->XYOFFSET.OFY); + + new_area.x = std::min(new_area.x, pre_vert.x); + new_area.z = std::max(new_area.z, pre_vert.x); + new_area.y = std::min(new_area.y, pre_vert.y); + new_area.w = std::max(new_area.w, pre_vert.y); + } + new_area = new_area.sra32<4>(); + new_area = new_area.rintersect(m_context->scissor.in); + //GSVector4i new_area_copy = new_area; + + /*if (!new_area.rintersect(temp_draw_rect).eq(new_area)) + return true;*/ + + /*if (!new_area.rintersect(temp_draw_rect).rempty()) + check = true;*/ + /*new_area_copy = new_area_copy.runion(temp_draw_rect); + else + new_area_copy = temp_draw_rect;*/ + + for (u32 b = m_current_buffer_idx; b < m_used_buffers_idx; b++) + { + /*if ((!m_env_buffers[b].m_env.PRIM.ABE || !m_env_buffers[b].m_env.CTXT[m_env_buffers[b].m_env.PRIM.CTXT].ALPHA.IsOpaque()) && !temp_draw_rect.eq(m_env_buffers[b].draw_rect)) + { + if (!((!m_env_buffers[b].m_env.PRIM.ABE || !m_env_buffers[b].m_env.CTXT[m_env_buffers[b].m_env.PRIM.CTXT].ALPHA.IsOpaque()) && !temp_draw_rect.eq(m_env_buffers[b].draw_rect) && !new_area.rintersect(m_env_buffers[b].draw_rect).rempty())) + DevCon.Warning("poo?"); + return true; + }*/ + if (new_area.rintersect(m_env_buffers[b].draw_rect).eq(new_area)/* && !temp_draw_rect.rintersect(m_env_buffers[b].draw_rect).rempty()*/) + return true; + } + + } + + /*const GSVertex* v = &m_vertex->buff[0]; + + GSVector4i new_area = GSVector4i(m_v.XYZ.X - m_context->XYOFFSET.OFX, m_v.XYZ.Y - m_context->XYOFFSET.OFY).xyxy(); + for (u32 i = 0; i < (n - 1); i++) + { + const int pos = m_index->buff[(m_index->tail - 1) - i]; + GSVector2i pre_vert = GSVector2i(v[pos].XYZ.X - m_context->XYOFFSET.OFX, v[pos].XYZ.Y - m_context->XYOFFSET.OFY); + new_area.x = std::min(new_area.x, pre_vert.x); + new_area.z = std::max(new_area.z, pre_vert.x); + new_area.y = std::min(new_area.y, pre_vert.y); + new_area.w = std::max(new_area.w, pre_vert.y); + } + new_area = new_area.sra32<4>(); + + if (new_area.rintersect(temp_draw_rect).eq(new_area)) + { + const int end_pos = m_index->tail - (n - 1); + //Need to check if it's already drawn at this vector with this setup, if it has, it means one of the other draws might be drawing over it, which is a bad time for us, so best check. + for (int j = 0; j < end_pos; j+=n) + { + if (v[m_index->buff[j]].XYZ.X == m_v.XYZ.X && v[m_index->buff[j]].XYZ.Y == m_v.XYZ.Y) + { + int min_point = std::max(j - 2, 0); + int match = 0; + + for (int k = min_point; k < (min_point + 5); k++) + { + if (k == j) + continue; + + if (v[m_index->buff[k]].XYZ.X == v[m_vertex->tail - 2].XYZ.X && v[m_index->buff[k]].XYZ.Y == v[m_vertex->tail - 2].XYZ.Y) + match |= 1; + if (v[m_index->buff[k]].XYZ.X == v[m_vertex->tail - 1].XYZ.X && v[m_index->buff[k]].XYZ.Y == v[m_vertex->tail - 1].XYZ.Y) + match |= 2; + } + + if (match) + return true; + } + } + }*/ + } + return false; +} + template __forceinline void GSState::VertexKick(u32 skip) { constexpr u32 n = NumIndicesForPrim(prim); static_assert(n > 0); - pxAssert(m_vertex.tail < m_vertex.maxcount + 3); + pxAssert(m_vertex->tail < m_vertex->maxcount + 3); if constexpr (prim == GS_INVALID) { - m_vertex.tail = m_vertex.head; + m_vertex->tail = m_vertex->head; return; } - if (auto_flush && skip == 0 && m_index.tail > 0 && ((m_vertex.tail + 1) - m_vertex.head) >= n) + if (CheckOverlapVerts(n)) + Flush(CONTEXTCHANGE); + + if (auto_flush && skip == 0 && m_index->tail > 0 && ((m_vertex->tail + 1) - m_vertex->head) >= n) { HandleAutoFlush(); } - u32 head = m_vertex.head; - u32 tail = m_vertex.tail; - u32 next = m_vertex.next; - u32 xy_tail = m_vertex.xy_tail; + u32 head = m_vertex->head; + u32 tail = m_vertex->tail; + u32 next = m_vertex->next; + u32 xy_tail = m_vertex->xy_tail; if (GSIsHardwareRenderer() && GSLocalMemory::m_psm[m_context->ZBUF.PSM].bpp == 32) { @@ -4751,7 +5275,7 @@ __forceinline void GSState::VertexKick(u32 skip) const GSVector4i new_v0(m_v.m[0]); const GSVector4i new_v1(m_v.m[1]); - GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[tail]; + GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex->buff[tail]; tailptr[0] = new_v0; tailptr[1] = new_v1; @@ -4763,20 +5287,31 @@ __forceinline void GSState::VertexKick(u32 skip) // applied, then we split it into the fixed/integer portions. const GSVector4i xy_ofs = new_v1.xxxx().u16to32().sub32(m_xyof); const GSVector4i xy = xy_ofs.blend32<12>(xy_ofs.sra32<4>()); - m_vertex.xy[xy_tail & 3] = xy; + m_vertex->xy[xy_tail & 3] = xy; // Backup head for triangle fans so we can read it later, otherwise it'll get lost after the 4th vertex. if (prim == GS_TRIANGLEFAN && tail == head) - m_vertex.xyhead = xy; + m_vertex->xyhead = xy; - m_vertex.tail = ++tail; - m_vertex.xy_tail = ++xy_tail; + m_vertex->tail = ++tail; + m_vertex->xy_tail = ++xy_tail; const u32 m = tail - head; if (m < n) return; + if (m_index->tail == 0/* && ((m_backed_up_ctx != m_env.PRIM.CTXT) || m_dirty_gs_regs)*/) + { + const int ctx = m_env.PRIM.CTXT; + std::memcpy(&m_prev_env, &m_env, 88); + std::memcpy(&m_prev_env.CTXT[ctx], &m_env.CTXT[ctx], 96); + std::memcpy(&m_prev_env.CTXT[ctx].offset, &m_env.CTXT[ctx].offset, sizeof(m_env.CTXT[ctx].offset)); + std::memcpy(&m_prev_env.CTXT[ctx].scissor, &m_env.CTXT[ctx].scissor, sizeof(m_env.CTXT[ctx].scissor)); + m_dirty_gs_regs = 0; + m_backed_up_ctx = m_env.PRIM.CTXT; + SetDrawBufferEnv(); + } // Skip draws when scissor is out of range (i.e. bottom-right is less than top-left), since everything will get clipped. skip |= static_cast(m_scissor_invalid); @@ -4784,9 +5319,9 @@ __forceinline void GSState::VertexKick(u32 skip) GSVector4i pmin, pmax; if (skip == 0) { - const GSVector4i v0 = m_vertex.xy[(xy_tail - 1) & 3]; - const GSVector4i v1 = m_vertex.xy[(xy_tail - 2) & 3]; - const GSVector4i v2 = (prim == GS_TRIANGLEFAN) ? m_vertex.xyhead : m_vertex.xy[(xy_tail - 3) & 3]; + const GSVector4i v0 = m_vertex->xy[(xy_tail - 1) & 3]; + const GSVector4i v1 = m_vertex->xy[(xy_tail - 2) & 3]; + const GSVector4i v2 = (prim == GS_TRIANGLEFAN) ? m_vertex->xyhead : m_vertex->xy[(xy_tail - 3) & 3]; switch (prim) { @@ -4858,14 +5393,14 @@ __forceinline void GSState::VertexKick(u32 skip) case GS_LINELIST: case GS_TRIANGLELIST: case GS_SPRITE: - m_vertex.tail = head; // no need to check or grow the buffer length + m_vertex->tail = head; // no need to check or grow the buffer length break; case GS_LINESTRIP: case GS_TRIANGLESTRIP: - m_vertex.head = head + 1; + m_vertex->head = head + 1; [[fallthrough]]; case GS_TRIANGLEFAN: - if (tail >= m_vertex.maxcount) + if (tail >= m_vertex->maxcount) GrowVertexBuffer(); // in case too many vertices were skipped break; default: @@ -4875,82 +5410,71 @@ __forceinline void GSState::VertexKick(u32 skip) return; } - if (tail >= m_vertex.maxcount) + if (tail >= m_vertex->maxcount) GrowVertexBuffer(); - if (m_index.tail == 0 && ((m_backed_up_ctx != m_env.PRIM.CTXT) || m_dirty_gs_regs)) - { - const int ctx = m_env.PRIM.CTXT; - std::memcpy(&m_prev_env, &m_env, 88); - std::memcpy(&m_prev_env.CTXT[ctx], &m_env.CTXT[ctx], 96); - std::memcpy(&m_prev_env.CTXT[ctx].offset, &m_env.CTXT[ctx].offset, sizeof(m_env.CTXT[ctx].offset)); - std::memcpy(&m_prev_env.CTXT[ctx].scissor, &m_env.CTXT[ctx].scissor, sizeof(m_env.CTXT[ctx].scissor)); - m_dirty_gs_regs = 0; - m_backed_up_ctx = m_env.PRIM.CTXT; - } - - u16* RESTRICT buff = &m_index.buff[m_index.tail]; + u16* RESTRICT buff = &m_index->buff[m_index->tail]; switch (prim) { case GS_POINTLIST: buff[0] = static_cast(head + 0); - m_vertex.head = head + 1; - m_vertex.next = head + 1; - m_index.tail += 1; + m_vertex->head = head + 1; + m_vertex->next = head + 1; + m_index->tail += 1; break; case GS_LINELIST: buff[0] = static_cast(head + 0); buff[1] = static_cast(head + 1); - m_vertex.head = head + 2; - m_vertex.next = head + 2; - m_index.tail += 2; + m_vertex->head = head + 2; + m_vertex->next = head + 2; + m_index->tail += 2; break; case GS_LINESTRIP: if (next < head) { - m_vertex.buff[next + 0] = m_vertex.buff[head + 0]; - m_vertex.buff[next + 1] = m_vertex.buff[head + 1]; + m_vertex->buff[next + 0] = m_vertex->buff[head + 0]; + m_vertex->buff[next + 1] = m_vertex->buff[head + 1]; head = next; - m_vertex.tail = next + 2; + m_vertex->tail = next + 2; } buff[0] = static_cast(head + 0); buff[1] = static_cast(head + 1); - m_vertex.head = head + 1; - m_vertex.next = head + 2; - m_index.tail += 2; + m_vertex->head = head + 1; + m_vertex->next = head + 2; + m_index->tail += 2; break; case GS_TRIANGLELIST: buff[0] = static_cast(head + 0); buff[1] = static_cast(head + 1); buff[2] = static_cast(head + 2); - m_vertex.head = head + 3; - m_vertex.next = head + 3; - m_index.tail += 3; + m_vertex->head = head + 3; + m_vertex->next = head + 3; + m_index->tail += 3; break; case GS_TRIANGLESTRIP: if (next < head) { - m_vertex.buff[next + 0] = m_vertex.buff[head + 0]; - m_vertex.buff[next + 1] = m_vertex.buff[head + 1]; - m_vertex.buff[next + 2] = m_vertex.buff[head + 2]; + m_vertex->buff[next + 0] = m_vertex->buff[head + 0]; + m_vertex->buff[next + 1] = m_vertex->buff[head + 1]; + m_vertex->buff[next + 2] = m_vertex->buff[head + 2]; head = next; - m_vertex.tail = next + 3; + m_vertex->tail = next + 3; } buff[0] = static_cast(head + 0); buff[1] = static_cast(head + 1); buff[2] = static_cast(head + 2); - m_vertex.head = head + 1; - m_vertex.next = head + 3; - m_index.tail += 3; + m_vertex->head = head + 1; + m_vertex->next = head + 3; + m_index->tail += 3; break; case GS_TRIANGLEFAN: // TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare) buff[0] = static_cast(head + 0); buff[1] = static_cast(tail - 2); buff[2] = static_cast(tail - 1); - m_vertex.next = tail; - m_index.tail += 3; + m_vertex->next = tail; + m_index->tail += 3; break; case GS_SPRITE: buff[0] = static_cast(head + 0); @@ -4958,11 +5482,11 @@ __forceinline void GSState::VertexKick(u32 skip) // Update the first vert's Q for ease of doing Autoflush if (!m_env.PRIM.FST) - m_vertex.buff[buff[0]].RGBAQ.Q = m_vertex.buff[buff[1]].RGBAQ.Q; + m_vertex->buff[buff[0]].RGBAQ.Q = m_vertex->buff[buff[1]].RGBAQ.Q; - m_vertex.head = head + 2; - m_vertex.next = head + 2; - m_index.tail += 2; + m_vertex->head = head + 2; + m_vertex->next = head + 2; + m_index->tail += 2; break; default: ASSUME(0); @@ -4971,14 +5495,14 @@ __forceinline void GSState::VertexKick(u32 skip) // Update rectangle for the current draw. We can use the re-integer coordinates from min/max here. const GSVector4i draw_min = pmin.zwzw(); const GSVector4i draw_max = pmax; - if (m_vertex.tail != n) + if (m_vertex->tail != n) temp_draw_rect = temp_draw_rect.min_i32(draw_min).blend32<12>(temp_draw_rect.max_i32(draw_max)); else temp_draw_rect = draw_min.blend32<12>(draw_max); temp_draw_rect = temp_draw_rect.rintersect(m_context->scissor.in); constexpr u32 max_vertices = MaxVerticesForPrim(prim); - if (max_vertices != 0 && m_vertex.tail >= max_vertices) + if (max_vertices != 0 && m_vertex->tail >= max_vertices) Flush(VERTEXCOUNT); } @@ -5141,9 +5665,9 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL if (!int_rc.eq(scissored_rc)) { - const GSVertex* vert_first = &m_vertex.buff[m_index.buff[0]]; - const GSVertex* vert_second = &m_vertex.buff[m_index.buff[1]]; - const GSVertex* vert_third = &m_vertex.buff[m_index.buff[2]]; + const GSVertex* vert_first = &m_vertex->buff[m_index->buff[0]]; + const GSVertex* vert_second = &m_vertex->buff[m_index->buff[1]]; + const GSVertex* vert_third = &m_vertex->buff[m_index->buff[2]]; GSVector4 new_st = st; bool u_forward_check = PRIM->FST ? (vert_first->U < vert_second->U) : ((vert_first->ST.S / vert_first->RGBAQ.Q) < (vert_second->ST.S / vert_first->RGBAQ.Q)); @@ -6294,3 +6818,4 @@ void GSState::GSPCRTCRegs::CalculateDisplayOffset(bool scanmask) } } } + diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index f26db3bfa3..664e663713 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -127,37 +127,41 @@ private: protected: static constexpr int INVALID_ALPHA_MINMAX = 500; + static constexpr int MAX_DRAW_BUFFERS = 3; GSVertex m_v = {}; float m_q = 1.0f; GSVector4i m_scissor_cull_min = {}; GSVector4i m_scissor_cull_max = {}; GSVector4i m_xyof = {}; + u32 m_used_buffers_idx = 0; + u32 m_current_buffer_idx = 0; + bool m_recent_buffer_switch = false; - struct + struct GSVertexBuff { GSVertex* buff; - GSVertex* buff_copy; // same size buffer to copy/modify the original buffer + GSVertex* buff_copy; // same size buffer to copy/modify the original buffer u32 head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1 u32 xy_tail; GSVector4i xy[4]; GSVector4i xyhead; - } m_vertex = {}; + }; - struct + GSVertexBuff m_vertex_buffers[MAX_DRAW_BUFFERS]; + GSVertexBuff* m_vertex; + + struct GSIndexBuff { u16* buff; u32 tail; - } m_index = {}; + }; - struct - { - GSVertex* buff; - u32 head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1 - u32 xy_tail; - GSVector4i xy[4]; - GSVector4i xyhead; - } m_draw_vertex = {}; + GSIndexBuff m_index_buffers[MAX_DRAW_BUFFERS]; + + GSIndexBuff* m_index; + + GSVertexBuff m_draw_vertex = {}; struct { @@ -165,6 +169,16 @@ protected: u32 tail; } m_draw_index = {}; + struct GSDrawBufferEnv + { + GSDrawingEnvironment m_env; + int m_backed_up_ctx = 0; + u32 m_dirty_regs = 0; + GSVector4i draw_rect = GSVector4i::zero(); + }; + + GSDrawBufferEnv m_env_buffers[MAX_DRAW_BUFFERS] = {}; + void UpdateContext(); void UpdateScissor(); @@ -175,6 +189,7 @@ protected: template void HandleAutoFlush(); bool EarlyDetectShuffle(u32 prim); void CheckCLUTValidity(u32 prim); + bool CheckOverlapVerts(u32 n); template void VertexKick(u32 skip); @@ -241,9 +256,10 @@ public: GSLocalMemory m_mem; GSDrawingEnvironment m_env = {}; GSDrawingEnvironment m_prev_env = {}; + GSDrawingEnvironment m_temp_env = {}; const GSDrawingEnvironment* m_draw_env = &m_env; GSDrawingContext* m_context = nullptr; - GSVector4i temp_draw_rect = {}; + GSVector4i temp_draw_rect; std::unique_ptr m_dump; bool m_scissor_invalid = false; bool m_quad_check_valid = false; @@ -441,7 +457,15 @@ public: virtual void Reset(bool hardware_reset); virtual void UpdateSettings(const Pcsx2Config::GSOptions& old_config); + void ResetDrawBuffers(); + void ResetDrawBufferIdx(); + void FlushBuffers(bool use_flush_reason = false, GSFlushReason flush_reason = GSFlushReason::CONTEXTCHANGE); + void PushBuffer(); + void SetDrawBufferEnv(); + void SetDrawBuffDirty(); + bool CanBufferNewDraw(); void Flush(GSFlushReason reason); + void FlushDraw(GSFlushReason reason); u32 CalcMask(int exp, int max_exp); void FlushPrim(); bool TestDrawChanged(); @@ -454,7 +478,7 @@ public: virtual void Move(); - GSVector4i GetTEX0Rect(); + GSVector4i GetTEX0Rect(GSDrawingContext prev_ctx); void CheckWriteOverlap(bool req_write, bool req_read); void Write(const u8* mem, int len); void Read(u8* mem, int len); diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 1bfc4632cf..dc844bd5cf 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -115,7 +115,7 @@ bool GSHwHack::GSC_IRem(GSRendererHW& r, int& skip) // Detect the deswizzling shuffle from depth, copying the RG and BA separately on each half of the page (ignore the split). if (RTME && RFBP != RTBP0 && RFPSM == PSMCT16S && RTPSM == PSMCT16S) { - if (r.m_vt.m_max.p.x == 64 && r.m_vt.m_max.p.y == 64 && r.m_index.tail == 128) + if (r.m_vt.m_max.p.x == 64 && r.m_vt.m_max.p.y == 64 && r.m_index->tail == 128) { const GSVector4i draw_size(r.m_vt.m_min.p.x, r.m_vt.m_min.p.y/2, r.m_vt.m_max.p.x, r.m_vt.m_max.p.y/2); const GSVector4i read_size(r.m_vt.m_min.t.x, r.m_vt.m_min.t.y/2, r.m_vt.m_max.t.x, r.m_vt.m_max.t.y/2); @@ -126,7 +126,7 @@ bool GSHwHack::GSC_IRem(GSRendererHW& r, int& skip) } // Following the previous draw, it tries to copy everything read from depth and offset it by 2, for the alternate line channel shuffle (skipped above). - if (RTBP0 == (RFBP - 0x20) && r.m_vt.m_max.p.x == 64 && r.m_vt.m_max.p.y == 34 && r.m_index.tail == 2) + if (RTBP0 == (RFBP - 0x20) && r.m_vt.m_max.p.x == 64 && r.m_vt.m_max.p.y == 34 && r.m_index->tail == 2) { GSVector4i draw_size(r.m_vt.m_min.p.x, r.m_vt.m_min.p.y - 2.0f, r.m_vt.m_max.p.x, r.m_vt.m_max.p.y - 2.0f); GSVector4i read_size(r.m_vt.m_min.t.x, r.m_vt.m_min.t.y, r.m_vt.m_max.t.x, r.m_vt.m_max.t.y); @@ -264,10 +264,10 @@ bool GSHwHack::GSC_SFEX3(GSRendererHW& r, int& skip) // Skipping is no good as the copy is used again later, and it causes a weird shimmer/echo effect every other frame. // Add on the height from the second part of the draw to the first, to make it one big rect. - r.m_vertex.buff[1].XYZ.Y += r.m_vertex.buff[r.m_vertex.tail - 1].XYZ.Y - r.m_context->XYOFFSET.OFY; - r.m_vertex.buff[1].V = r.m_vertex.buff[r.m_vertex.tail - 1].V; - r.m_vertex.tail = 2; - r.m_index.tail = 2; + r.m_vertex->buff[1].XYZ.Y += r.m_vertex->buff[r.m_vertex->tail - 1].XYZ.Y - r.m_context->XYOFFSET.OFY; + r.m_vertex->buff[1].V = r.m_vertex->buff[r.m_vertex->tail - 1].V; + r.m_vertex->tail = 2; + r.m_index->tail = 2; } } @@ -332,9 +332,9 @@ bool GSHwHack::GSC_NamcoGames(GSRendererHW& r, int& skip) { if (skip == 0) { - if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index.tail > 2) + if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index->tail > 2) { - GSVertex* v = &r.m_vertex.buff[0]; + GSVertex* v = &r.m_vertex->buff[0]; // Don't enable hack on native res. // Fixes ghosting/blur effect and white lines appearing in stages: Moonfit Wilderness, Acid Rain - caused by upscaling. // Game copies the framebuffer as individual page rects with slight offsets (like 1/16 of a pixel etc) which doesn't wokr well with upscaling. @@ -348,7 +348,7 @@ bool GSHwHack::GSC_NamcoGames(GSRendererHW& r, int& skip) else { // Fixes the alignment of the two halves for the heat haze on the temple stage. - for (u32 i = 0; i < r.m_index.tail; i+=2) + for (u32 i = 0; i < r.m_index->tail; i+=2) { v[i].XYZ.Y -= 0x8; } @@ -676,7 +676,7 @@ bool GSHwHack::GSC_NFSUndercover(GSRendererHW& r, int& skip) if (RPRIM->TME && Frame.PSM == PSMCT16S && Frame.FBMSK != 0 && Frame.FBW == 10 && Texture.TBW == 1 && Texture.TBP0 == 0x02800 && Texture.PSM == PSMZ16S) { - GSVertex* v = &r.m_vertex.buff[1]; + GSVertex* v = &r.m_vertex->buff[1]; v[0].XYZ.X = static_cast(RCONTEXT->XYOFFSET.OFX + ((r.m_r.z * 2) << 4)); v[0].XYZ.Y = static_cast(RCONTEXT->XYOFFSET.OFY + (r.m_r.w << 4)); v[0].U = r.m_r.z << 4; @@ -687,8 +687,8 @@ bool GSHwHack::GSC_NFSUndercover(GSRendererHW& r, int& skip) r.m_vt.m_max.p.y = r.m_r.w; r.m_vt.m_max.t.x = r.m_r.z; r.m_vt.m_max.t.y = r.m_r.w; - r.m_vertex.head = r.m_vertex.tail = r.m_vertex.next = 2; - r.m_index.tail = 2; + r.m_vertex->head = r.m_vertex->tail = r.m_vertex->next = 2; + r.m_index->tail = 2; skip = 79; } else @@ -840,7 +840,7 @@ bool GSHwHack::GSC_Battlefield2(GSRendererHW& r, int& skip) if (dst) { - float dc = r.m_vertex.buff[1].XYZ.Z; + float dc = r.m_vertex->buff[1].XYZ.Z; g_gs_device->ClearDepth(dst->m_texture, dc * std::exp2(-32.0f)); } } @@ -858,9 +858,9 @@ bool GSHwHack::GSC_BlueTongueGames(GSRendererHW& r, int& skip) if (RPRIM->TME && RTEX0.TW == 3 && RTEX0.TH == 3 && RTEX0.PSM == 0 && RFRAME.FBMSK == 0x00FFFFFF && RFRAME.FBW == 8 && r.PCRTCDisplays.GetResolution().x > 512) { // Check we are drawing stripes - for (u32 i = 1; i < r.m_vertex.tail; i+=2) + for (u32 i = 1; i < r.m_vertex->tail; i+=2) { - int value = (((r.m_vertex.buff[i].XYZ.X - r.m_vertex.buff[i - 1].XYZ.X) + 8) >> 4); + int value = (((r.m_vertex->buff[i].XYZ.X - r.m_vertex->buff[i - 1].XYZ.X) + 8) >> 4); if (value != 32) return false; } @@ -872,18 +872,18 @@ bool GSHwHack::GSC_BlueTongueGames(GSRendererHW& r, int& skip) for (int vert = 32; vert < 40; vert+=2) { - r.m_vertex.buff[vert].XYZ.X = context->XYOFFSET.OFX + (((vert * 16) << 4) - 8); - r.m_vertex.buff[vert].XYZ.Y = context->XYOFFSET.OFY; - r.m_vertex.buff[vert].U = (vert * 16) << 4; - r.m_vertex.buff[vert].V = 0; - r.m_vertex.buff[vert+1].XYZ.X = context->XYOFFSET.OFX + ((((vert * 16) + 32) << 4) - 8); - r.m_vertex.buff[vert+1].XYZ.Y = context->XYOFFSET.OFY + (r.PCRTCDisplays.GetResolution().y << 4) + 8; - r.m_vertex.buff[vert+1].U = ((vert * 16) + 32) << 4; - r.m_vertex.buff[vert+1].V = r.PCRTCDisplays.GetResolution().y << 4; + r.m_vertex->buff[vert].XYZ.X = context->XYOFFSET.OFX + (((vert * 16) << 4) - 8); + r.m_vertex->buff[vert].XYZ.Y = context->XYOFFSET.OFY; + r.m_vertex->buff[vert].U = (vert * 16) << 4; + r.m_vertex->buff[vert].V = 0; + r.m_vertex->buff[vert+1].XYZ.X = context->XYOFFSET.OFX + ((((vert * 16) + 32) << 4) - 8); + r.m_vertex->buff[vert+1].XYZ.Y = context->XYOFFSET.OFY + (r.PCRTCDisplays.GetResolution().y << 4) + 8; + r.m_vertex->buff[vert+1].U = ((vert * 16) + 32) << 4; + r.m_vertex->buff[vert+1].V = r.PCRTCDisplays.GetResolution().y << 4; } - /*r.m_vertex.head = r.m_vertex.tail = r.m_vertex.next = 2; - r.m_index.tail = 2;*/ + /*r.m_vertex->head = r.m_vertex->tail = r.m_vertex->next = 2; + r.m_index->tail = 2;*/ r.m_vt.m_max.p.x = r.m_r.z; r.m_vt.m_max.p.y = r.m_r.w; @@ -917,7 +917,7 @@ bool GSHwHack::GSC_BlueTongueGames(GSRendererHW& r, int& skip) // This is the giant dither-like depth buffer. We need this on the CPU *and* the GPU for textures which are // rendered on both. - if (context->FRAME.FBW == 8 && r.m_index.tail == 32 && r.PRIM->TME && context->TEX0.TBW == 1) + if (context->FRAME.FBW == 8 && r.m_index->tail == 32 && r.PRIM->TME && context->TEX0.TBW == 1) { r.SwPrimRender(r, false, false); return false; @@ -957,8 +957,8 @@ bool GSHwHack::GSC_MetalGearSolid3(GSRendererHW& r, int& skip) GL_INS("OI_MetalGearSolid3(): %x -> %x, %dx%d, subtract %d", RFBP, RFBP + (RFBW / 2), r.m_r.width(), r.m_r.height(), w_sub); - for (u32 i = 0; i < r.m_vertex.next; i++) - r.m_vertex.buff[i].XYZ.X -= w_sub_fp; + for (u32 i = 0; i < r.m_vertex->next; i++) + r.m_vertex->buff[i].XYZ.X -= w_sub_fp; // No point adjusting the scissor, it just ends up expanding out anyway.. but we do have to fix up the draw rect. r.m_r -= GSVector4i(w_sub); @@ -971,7 +971,7 @@ bool GSHwHack::GSC_Turok(GSRendererHW& r, int& skip) // Since we can't look in to the future to check this, the options are either rearrange all the pages in a target when the width changes // (very slow, could break a ton of stuff which stores different things in the alpha channel), or this. I choose this. - if (r.m_index.tail == 6 && RPRIM->PRIM == 4 && !RTME && RFBMSK == 0x00FFFFFF && floor(r.m_vt.m_max.p.x) == 512 && r.m_env.CTXT[r.m_backed_up_ctx].FRAME.FBW == 10 && RFRAME.FBW == 8 && RFPSM == PSMCT32 && RTEST.ATE && RTEST.ATST == ATST_GEQUAL) + if (r.m_index->tail == 6 && RPRIM->PRIM == 4 && !RTME && RFBMSK == 0x00FFFFFF && floor(r.m_vt.m_max.p.x) == 512 && r.m_env.CTXT[r.m_backed_up_ctx].FRAME.FBW == 10 && RFRAME.FBW == 8 && RFPSM == PSMCT32 && RTEST.ATE && RTEST.ATST == ATST_GEQUAL) { int num_pages = r.m_cached_ctx.FRAME.FBW * ((floor(r.m_vt.m_max.p.y) + 31) / 32); r.m_cached_ctx.FRAME.FBW = 10; @@ -988,7 +988,7 @@ bool GSHwHack::GSC_Turok(GSRendererHW& r, int& skip) bool GSHwHack::OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) { - const u32 n_vertices = r.m_vertex.next; + const u32 n_vertices = r.m_vertex->next; const int w = r.m_r.width(); const int h = r.m_r.height(); const bool is_copy = !r.PRIM->ABE || ( @@ -1021,7 +1021,7 @@ bool GSHwHack::OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds const u32 FBP = r.m_cached_ctx.FRAME.Block(); const u32 FBW = r.m_cached_ctx.FRAME.FBW; GL_INS("PointListPalette - m_r = <%d, %d => %d, %d>, n_vertices = %u, FBP = 0x%x, FBW = %u", r.m_r.x, r.m_r.y, r.m_r.z, r.m_r.w, n_vertices, FBP, FBW); - const GSVertex* RESTRICT v = r.m_vertex.buff; + const GSVertex* RESTRICT v = r.m_vertex->buff; const int ox(r.m_context->XYOFFSET.OFX); const int oy(r.m_context->XYOFFSET.OFY); for (size_t i = 0; i < n_vertices; ++i) @@ -1219,9 +1219,9 @@ bool GSHwHack::OI_ArTonelico2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GST buffer to adapt the page width properly. */ - const GSVertex* v = &r.m_vertex.buff[0]; + const GSVertex* v = &r.m_vertex->buff[0]; - if (ds && r.m_vertex.next == 2 && !RPRIM->TME && RFRAME.FBW == 10 && v->XYZ.Z == 0 && RTEST.ZTST == ZTST_ALWAYS) + if (ds && r.m_vertex->next == 2 && !RPRIM->TME && RFRAME.FBW == 10 && v->XYZ.Z == 0 && RTEST.ZTST == ZTST_ALWAYS) { GL_INS("OI_ArTonelico2"); g_gs_device->ClearDepth(ds, 0.0f); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 5d85baecc5..5efd836c58 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -218,7 +218,7 @@ void GSRendererHW::Lines2Sprites() // each sprite converted to quad needs twice the space - while (m_vertex.tail * 2 > m_vertex.maxcount) + while (m_vertex->tail * 2 > m_vertex->maxcount) { GrowVertexBuffer(); } @@ -226,14 +226,14 @@ void GSRendererHW::Lines2Sprites() // assume vertices are tightly packed and sequentially indexed (it should be the case) const bool predivide_q = PRIM->TME && !PRIM->FST && m_vt.m_accurate_stq; - if (m_vertex.next >= 2) + if (m_vertex->next >= 2) { - const u32 count = m_vertex.next; + const u32 count = m_vertex->next; int i = static_cast(count) * 2 - 4; - GSVertex* s = &m_vertex.buff[count - 2]; - GSVertex* q = &m_vertex.buff[count * 2 - 4]; - u16* RESTRICT index = &m_index.buff[count * 3 - 6]; + GSVertex* s = &m_vertex->buff[count - 2]; + GSVertex* q = &m_vertex->buff[count * 2 - 4]; + u16* RESTRICT index = &m_index->buff[count * 3 - 6]; // Sprites are flat shaded, so the provoking vertex doesn't matter here. constexpr GSVector4i indices = GSVector4i::cxpr16(0, 1, 2, 1, 2, 3, 0, 0); @@ -287,19 +287,19 @@ void GSRendererHW::Lines2Sprites() std::memcpy(&index[4], &high, sizeof(high)); } - m_vertex.head = m_vertex.tail = m_vertex.next = count * 2; - m_index.tail = count * 3; + m_vertex->head = m_vertex->tail = m_vertex->next = count * 2; + m_index->tail = count * 3; } } void GSRendererHW::ExpandLineIndices() { - const u32 process_count = (m_index.tail + 7) / 8 * 8; + const u32 process_count = (m_index->tail + 7) / 8 * 8; constexpr u32 expansion_factor = 3; - m_index.tail *= expansion_factor; - GSVector4i* end = reinterpret_cast(m_index.buff); - GSVector4i* read = reinterpret_cast(m_index.buff + process_count); - GSVector4i* write = reinterpret_cast(m_index.buff + process_count * expansion_factor); + m_index->tail *= expansion_factor; + GSVector4i* end = reinterpret_cast(m_index->buff); + GSVector4i* read = reinterpret_cast(m_index->buff + process_count); + GSVector4i* write = reinterpret_cast(m_index->buff + process_count * expansion_factor); constexpr GSVector4i mask0 = GSVector4i::cxpr8(0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5); constexpr GSVector4i mask1 = GSVector4i::cxpr8(6, 7, 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 8, 9); @@ -353,13 +353,13 @@ __fi bool GSRendererHW::Is8PixelReverseSprite(const GSVertex& v0, const GSVertex // Fix the vertex position/tex_coordinate from 16 bits color to 32 bits color void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, bool& shuffle_across, GSTextureCache::Target* rt, GSTextureCache::Source* tex) { - pxAssert(m_vertex.next % 2 == 0); // Either sprites or an even number of triangles. + pxAssert(m_vertex->next % 2 == 0); // Either sprites or an even number of triangles. const bool recursive_draw = m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0; const bool sprites = m_vt.m_primclass == GS_SPRITE_CLASS; - u32 count = m_vertex.next; - GSVertex* v = &m_vertex.buff[0]; + u32 count = m_vertex->next; + GSVertex* v = &m_vertex->buff[0]; const GIFRegXYOFFSET& o = m_context->XYOFFSET; // Could be drawing upside down or just back to front on the actual verts. // Iterate through the sprites in order and find one to infer which channels are being shuffled. @@ -500,12 +500,12 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, // no need to adjust v[0] because it should already be correct. if (PRIM->FST) { - v[1].U = v[m_index.buff[m_index.tail - 1]].U; - v[1].V = v[m_index.buff[m_index.tail - 1]].V; + v[1].U = v[m_index->buff[m_index->tail - 1]].U; + v[1].V = v[m_index->buff[m_index->tail - 1]].V; } else { - v[1].ST = v[m_index.buff[m_index.tail - 1]].ST; + v[1].ST = v[m_index->buff[m_index->tail - 1]].ST; } } else @@ -526,8 +526,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, } } m_r = r; - m_vertex.head = m_vertex.tail = m_vertex.next = 2; - m_index.tail = 2; + m_vertex->head = m_vertex->tail = m_vertex->next = 2; + m_index->tail = 2; return; } @@ -640,8 +640,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, } if (wi != count) { - count = m_vertex.head = m_vertex.tail = m_vertex.next = wi; - m_index.tail = wi; + count = m_vertex->head = m_vertex->tail = m_vertex->next = wi; + m_index->tail = wi; } } @@ -805,7 +805,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, } } - if (m_index.tail == 0) + if (m_index->tail == 0) { GL_INS("HW: ConvertSpriteTextureShuffle: Culled all vertices; exiting."); return; @@ -896,17 +896,17 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_context->scissor.in.w /= 2; m_context->scissor.in.z *= 2; - v[1].XYZ.X = ((v[m_index.buff[m_index.tail - 1]].XYZ.X - m_context->XYOFFSET.OFX) * 2) + m_context->XYOFFSET.OFX; - v[1].XYZ.Y = ((v[m_index.buff[m_index.tail - 1]].XYZ.Y - m_context->XYOFFSET.OFY) / 2) + m_context->XYOFFSET.OFY; + v[1].XYZ.X = ((v[m_index->buff[m_index->tail - 1]].XYZ.X - m_context->XYOFFSET.OFX) * 2) + m_context->XYOFFSET.OFX; + v[1].XYZ.Y = ((v[m_index->buff[m_index->tail - 1]].XYZ.Y - m_context->XYOFFSET.OFY) / 2) + m_context->XYOFFSET.OFY; - v[1].U = v[m_index.buff[m_index.tail - 1]].U * 2; - v[1].V = v[m_index.buff[m_index.tail - 1]].V / 2; + v[1].U = v[m_index->buff[m_index->tail - 1]].U * 2; + v[1].V = v[m_index->buff[m_index->tail - 1]].V / 2; - v[1].ST.S = v[m_index.buff[m_index.tail - 1]].ST.S * 2; - v[1].ST.T = v[m_index.buff[m_index.tail - 1]].ST.T / 2; + v[1].ST.S = v[m_index->buff[m_index->tail - 1]].ST.S * 2; + v[1].ST.T = v[m_index->buff[m_index->tail - 1]].ST.T / 2; - m_vertex.head = m_vertex.tail = m_vertex.next = 2; - m_index.tail = 2; + m_vertex->head = m_vertex->tail = m_vertex->next = 2; + m_index->tail = 2; m_cached_ctx.TEX0.TBW *= 2; m_cached_ctx.FRAME.FBW *= 2; @@ -923,7 +923,7 @@ GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Sou return GSVector4(0.0f); } - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; const float scale = tex->GetScale(); const bool linear = m_vt.IsRealLinear(); const int t_position = v[0].U; @@ -1009,7 +1009,7 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex) // neither in a fast way. So instead let's just take the hypothesis that all sprites must have the same // size. // Tested on Tekken 5. - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; bool is_paving = true; bool is_paving_h = true; bool is_paving_v = true; @@ -1018,7 +1018,7 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex) const int first_dpU = v[1].U - v[0].U; const int first_dpY = v[1].XYZ.Y - v[0].XYZ.Y; const int first_dpV = v[1].V - v[0].V; - for (u32 i = 0; i < m_vertex.next; i += 2) + for (u32 i = 0; i < m_vertex->next; i += 2) { const int dpX = v[i + 1].XYZ.X - v[i].XYZ.X; const int dpU = v[i + 1].U - v[i].U; @@ -1043,14 +1043,14 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex) const GSVector4 delta_p = m_vt.m_max.p - m_vt.m_min.p; const GSVector4 delta_t = m_vt.m_max.t - m_vt.m_min.t; const bool is_blit = PrimitiveOverlap() == PRIM_OVERLAP_NO; - GL_INS("HW: PP SAMPLER: Dp %f %f Dt %f %f. Is blit %d, is paving %d, count %d", delta_p.x, delta_p.y, delta_t.x, delta_t.y, is_blit, is_paving, m_vertex.tail); + GL_INS("HW: PP SAMPLER: Dp %f %f Dt %f %f. Is blit %d, is paving %d, count %d", delta_p.x, delta_p.y, delta_t.x, delta_t.y, is_blit, is_paving, m_vertex->tail); #endif if (is_paving) { // Replace all sprite with a single fullscreen sprite. u32 unique_verts = 2; - GSVertex* s = &m_vertex.buff[0]; + GSVertex* s = &m_vertex->buff[0]; if (is_paving_h) { s[0].XYZ.X = static_cast((16.0f * m_vt.m_min.p.x) + m_context->XYOFFSET.OFX); @@ -1061,7 +1061,7 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex) } else { - for (u32 i = 2; i < (m_vertex.tail & ~1); i++) + for (u32 i = 2; i < (m_vertex->tail & ~1); i++) { bool unique_found = false; @@ -1102,7 +1102,7 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex) } else { - for (u32 i = 2; i < (m_vertex.tail & ~1); i++) + for (u32 i = 2; i < (m_vertex->tail & ~1); i++) { bool unique_found = false; @@ -1133,8 +1133,8 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex) } } - m_vertex.head = m_vertex.tail = m_vertex.next = unique_verts; - m_index.tail = unique_verts; + m_vertex->head = m_vertex->tail = m_vertex->next = unique_verts; + m_index->tail = unique_verts; } } } @@ -1261,7 +1261,7 @@ bool GSRendererHW::IsPossibleChannelShuffle() const { if (!PRIM->TME || m_cached_ctx.TEX0.PSM != PSMT8 || // 8-bit texture draw m_vt.m_primclass != GS_SPRITE_CLASS || // draw_sprite_tex - (m_vertex.tail <= 2 && (((m_vt.m_max.p - m_vt.m_min.p) <= GSVector4(8.0f)).mask() & 0x3) == 0x3)) // Powerdrome does a tiny shuffle on a couple of pixels, can't reliably translate this. + (m_vertex->tail <= 2 && (((m_vt.m_max.p - m_vt.m_min.p) <= GSVector4(8.0f)).mask() & 0x3) == 0x3)) // Powerdrome does a tiny shuffle on a couple of pixels, can't reliably translate this. { return false; } @@ -1269,7 +1269,7 @@ bool GSRendererHW::IsPossibleChannelShuffle() const const int mask = (((m_vt.m_max.p - m_vt.m_min.p) <= GSVector4(64.0f)).mask() & 0x3); if (mask == 0x3) // single_page { - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; const int draw_width = std::abs(v[1].XYZ.X - v[0].XYZ.X) >> 4; const int draw_height = std::abs(v[1].XYZ.Y - v[0].XYZ.Y) >> 4; @@ -1291,7 +1291,7 @@ bool GSRendererHW::IsPossibleChannelShuffle() const if (m_cached_ctx.TEX0.TBW == (m_cached_ctx.FRAME.FBW * 2) && GSLocalMemory::IsPageAligned(m_cached_ctx.FRAME.PSM, GSVector4i(m_vt.m_min.p.upld(m_vt.m_max.p)))) { - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; const int draw_width = std::abs(v[1].XYZ.X - v[0].XYZ.X) >> 4; const int draw_height = std::abs(v[1].XYZ.Y - v[0].XYZ.Y) >> 4; @@ -1357,7 +1357,7 @@ bool GSRendererHW::IsSplitTextureShuffle(GIFRegTEX0& rt_TEX0, GSVector4i& valid_ return false; // Different channel being shuffled, so needs to be handled separately (misdetection in 50 Cent) - if (m_vertex.buff[m_index.buff[0]].U != m_v.U) + if (m_vertex->buff[m_index->buff[0]].U != m_v.U) return false; // Check that both the position and texture coordinates are page aligned, so we can work in pages instead of coordinates. @@ -1745,7 +1745,7 @@ bool GSRendererHW::IsDepthAlwaysPassing() // Depth is always pass/fail (no read) and write are discarded. return (!m_cached_ctx.TEST.ZTE || m_cached_ctx.TEST.ZTST <= ZTST_ALWAYS) || // Depth test will always pass - (m_cached_ctx.TEST.ZTST == ZTST_GEQUAL && m_vt.m_eq.z && std::min(m_vertex.buff[check_index].XYZ.Z, max_z) == max_z); + (m_cached_ctx.TEST.ZTST == ZTST_GEQUAL && m_vt.m_eq.z && std::min(m_vertex->buff[check_index].XYZ.Z, max_z) == max_z); } bool GSRendererHW::IsUsingCsInBlend() @@ -1784,7 +1784,7 @@ bool GSRendererHW::IsTBPFrameOrZ(u32 tbp, bool frame_only) // Depth is always pass/fail (no read) and write are discarded. (zm != 0 && m_cached_ctx.TEST.ZTST <= ZTST_ALWAYS) || // Depth test will always pass - (zm != 0 && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL && m_vt.m_eq.z && std::min(m_vertex.buff[0].XYZ.Z, max_z) == max_z) || + (zm != 0 && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL && m_vt.m_eq.z && std::min(m_vertex->buff[0].XYZ.Z, max_z) == max_z) || // Depth will be written through the RT (!no_rt && m_cached_ctx.FRAME.FBP == m_cached_ctx.ZBUF.ZBP && !PRIM->TME && zm == 0 && (fm & fm_mask) == 0 && m_cached_ctx.TEST.ZTE)) || // No color or Z being written. @@ -1801,17 +1801,17 @@ void GSRendererHW::HandleManualDeswizzle() // Check if it's doing manual deswizzling first (draws are 32x16), if they are, check if the Z is flat, if not, // we're gonna have to get creative and swap around the quandrants, but that's a TODO. - GSVertex* v = &m_vertex.buff[0]; + GSVertex* v = &m_vertex->buff[0]; // Check for page quadrant and compare it to the quadrant from the verts, if it does match then we need to do correction. const GSVector2i page_quadrant = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs / 2; if (PRIM->FST) { - for (u32 i = 0; i < m_index.tail; i += 2) + for (u32 i = 0; i < m_index->tail; i += 2) { - const u32 index_first = m_index.buff[i]; - const u32 index_last = m_index.buff[i + 1]; + const u32 index_first = m_index->buff[i]; + const u32 index_last = m_index->buff[i + 1]; if ((abs((v[index_last].U) - (v[index_first].U)) >> 4) != page_quadrant.x || (abs((v[index_last].V) - (v[index_first].V)) >> 4) != page_quadrant.y) return; @@ -1819,10 +1819,10 @@ void GSRendererHW::HandleManualDeswizzle() } else { - for (u32 i = 0; i < m_index.tail; i += 2) + for (u32 i = 0; i < m_index->tail; i += 2) { - const u32 index_first = m_index.buff[i]; - const u32 index_last = m_index.buff[i + 1]; + const u32 index_first = m_index->buff[i]; + const u32 index_last = m_index->buff[i + 1]; const u32 x = abs(((v[index_last].ST.S / v[index_last].RGBAQ.Q) * (1 << m_context->TEX0.TW)) - ((v[index_first].ST.S / v[index_first].RGBAQ.Q) * (1 << m_context->TEX0.TW))); const u32 y = abs(((v[index_last].ST.T / v[index_last].RGBAQ.Q) * (1 << m_context->TEX0.TH)) - ((v[index_first].ST.T / v[index_first].RGBAQ.Q) * (1 << m_context->TEX0.TH))); @@ -2027,7 +2027,7 @@ void GSRendererHW::SwSpriteRender() const bool alpha_blending_enabled = NeedsBlending(); - const GSVertex& v = m_index.tail > 0 ? m_vertex.buff[m_index.buff[m_index.tail - 1]] : GSVertex(); // Last vertex if any. + const GSVertex& v = m_index->tail > 0 ? m_vertex->buff[m_index->buff[m_index->tail - 1]] : GSVertex(); // Last vertex if any. const GSVector4i vc = GSVector4i(v.RGBAQ.R, v.RGBAQ.G, v.RGBAQ.B, v.RGBAQ.A) // 0x000000AA000000BB000000GG000000RR .ps32(); // 0x00AA00BB00GG00RR00AA00BB00GG00RR @@ -2156,10 +2156,10 @@ bool GSRendererHW::CanUseSwSpriteRender() return false; if (PRIM->PRIM != GS_TRIANGLESTRIP && PRIM->PRIM != GS_SPRITE) // Triangle strip or sprite draw return false; - if (m_vt.m_primclass == GS_TRIANGLE_CLASS && (PRIM->PRIM != GS_TRIANGLESTRIP || m_vertex.tail != 4)) // If triangle class, strip draw with 4 vertices (two prims, emulating single sprite prim) + if (m_vt.m_primclass == GS_TRIANGLE_CLASS && (PRIM->PRIM != GS_TRIANGLESTRIP || m_vertex->tail != 4)) // If triangle class, strip draw with 4 vertices (two prims, emulating single sprite prim) return false; // TODO If GS_TRIANGLESTRIP draw, check that the draw is axis aligned - if (m_vt.m_primclass == GS_SPRITE_CLASS && (PRIM->PRIM != GS_SPRITE || m_vertex.tail != 2)) // If sprite class, sprite draw with 2 vertices (one prim) + if (m_vt.m_primclass == GS_SPRITE_CLASS && (PRIM->PRIM != GS_SPRITE || m_vertex->tail != 2)) // If sprite class, sprite draw with 2 vertices (one prim) return false; if (m_cached_ctx.DepthRead() || m_cached_ctx.DepthWrite()) // No depth handling return false; @@ -2203,8 +2203,8 @@ void GSRendererHW::RoundSpriteOffset() #if defined(DEBUG_V) || defined(DEBUG_U) bool debug = linear; #endif - const u32 count = m_vertex.next; - GSVertex* v = &m_vertex.buff[0]; + const u32 count = m_vertex->next; + GSVertex* v = &m_vertex->buff[0]; for (u32 i = 0; i < count; i += 2) { @@ -2333,7 +2333,7 @@ void GSRendererHW::Draw() m_cached_ctx.FRAME = context->FRAME; m_cached_ctx.ZBUF = context->ZBUF; - if (IsBadFrame()) + if (IsBadFrame()) { GL_INS("HW: Warning skipping a draw call (%d)", s_n); return; @@ -2807,7 +2807,7 @@ void GSRendererHW::Draw() } const u32 vert_index = (m_vt.m_primclass == GS_TRIANGLE_CLASS) ? 2 : 1; - u32 const_color = m_vertex.buff[m_index.buff[vert_index]].RGBAQ.U32[0]; + u32 const_color = m_vertex->buff[m_index->buff[vert_index]].RGBAQ.U32[0]; u32 fb_mask = m_cached_ctx.FRAME.FBMSK; // If we could just check the colour, it would be great, but Echo Night decided it's going to set the alpha and green to 128, for some reason, and actually be 32bit, so it ruined my day. @@ -2843,7 +2843,7 @@ void GSRendererHW::Draw() m_cached_ctx.TEXA.TA0 = 0; m_cached_ctx.TEXA.TA1 = 128; m_cached_ctx.FRAME.PSM = (m_cached_ctx.FRAME.PSM & 2) ? m_cached_ctx.FRAME.PSM : PSMCT16; - m_vertex.buff[m_index.buff[1]].RGBAQ.U32[0] = const_color; + m_vertex->buff[m_index->buff[1]].RGBAQ.U32[0] = const_color; ReplaceVerticesWithSprite(m_r, GSVector2i(m_r.width(), m_r.height())); } @@ -3060,13 +3060,13 @@ void GSRendererHW::Draw() const u32 page_alignment = GSLocalMemory::IsPageAlignedMasked(m_cached_ctx.TEX0.PSM, m_r); const bool page_aligned = (page_alignment & 0xF0F0) != 0; // Make sure Y is page aligned. if (!no_rt && page_aligned && m_cached_ctx.ZBUF.ZMSK && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16 && - (m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads(true) && m_index.tail > 6))) + (m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index->tail % 6) == 0 && TrianglesAreQuads(true) && m_index->tail > 6))) { // Tail check is to make sure we have enough strips to go all the way across the page, or if it's using a region clamp could be used to draw strips. if (GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && - (m_index.tail >= (m_cached_ctx.TEX0.TBW * 2) || m_cached_ctx.TEX0.TBP0 == m_cached_ctx.FRAME.Block() || m_cached_ctx.CLAMP.WMS > CLAMP_CLAMP || m_cached_ctx.CLAMP.WMT > CLAMP_CLAMP)) + (m_index->tail >= (m_cached_ctx.TEX0.TBW * 2) || m_cached_ctx.TEX0.TBP0 == m_cached_ctx.FRAME.Block() || m_cached_ctx.CLAMP.WMS > CLAMP_CLAMP || m_cached_ctx.CLAMP.WMT > CLAMP_CLAMP)) { - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; const int first_x = std::clamp((static_cast(((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8))) >> 4, 0, 2048); const bool offset_last = PRIM->FST ? (v[1].U > v[0].U) : ((v[1].ST.S / v[1].RGBAQ.Q) > (v[0].ST.S / v[1].RGBAQ.Q)); @@ -3092,11 +3092,11 @@ void GSRendererHW::Draw() { bool shuffle_channel_reads = !m_cached_ctx.FRAME.FBMSK; const u32 increment = (m_vt.m_primclass == GS_TRIANGLE_CLASS) ? 3 : 2; - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; if (shuffle_channel_reads) { - for (u32 i = 0; i < m_index.tail; i += increment) + for (u32 i = 0; i < m_index->tail; i += increment) { const int first_u = (PRIM->FST ? v[i].U : static_cast(v[i].ST.S / v[(increment == 2) ? i + 1 : i].RGBAQ.Q)) >> 4; const int second_u = (PRIM->FST ? v[i + 1].U : static_cast(v[i + 1].ST.S / v[i + 1].RGBAQ.Q)) >> 4; @@ -3243,7 +3243,6 @@ void GSRendererHW::Draw() float target_scale = GetTextureScaleFactor(); bool scaled_copy = false; int scale_draw = IsScalingDraw(src, m_primitive_covers_without_gaps != NoGapsType::GapsFound); - m_downscale_source = false; if (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off) { @@ -3277,6 +3276,8 @@ void GSRendererHW::Draw() scale_draw = 1; scaled_copy = true; } + + m_downscale_source = false; } } @@ -3445,9 +3446,9 @@ void GSRendererHW::Draw() if (vertical_offset || horizontal_offset) { - GSVertex* v = &m_vertex.buff[0]; + GSVertex* v = &m_vertex->buff[0]; - for (u32 i = 0; i < m_vertex.tail; i++) + for (u32 i = 0; i < m_vertex->tail; i++) { v[i].XYZ.X += horizontal_offset << 4; v[i].XYZ.Y += vertical_offset << 4; @@ -3742,9 +3743,9 @@ void GSRendererHW::Draw() if (vertical_offset || horizontal_offset) { - GSVertex* v = &m_vertex.buff[0]; + GSVertex* v = &m_vertex->buff[0]; - for (u32 i = 0; i < m_vertex.tail; i++) + for (u32 i = 0; i < m_vertex->tail; i++) { v[i].XYZ.X += horizontal_offset << 4; v[i].XYZ.Y += vertical_offset << 4; @@ -4031,7 +4032,7 @@ void GSRendererHW::Draw() if (m_process_texture) { GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; if (rt) { @@ -4053,7 +4054,7 @@ void GSRendererHW::Draw() // Both input and output are 16 bits and texture was initially 32 bits! Same for the target, Sonic Unleash makes a new target which really is 16bit. m_texture_shuffle = ((m_same_group_texture_shuffle || (tex_psm.bpp == 16)) && (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) && (shuffle_coords || rt->m_32_bits_fmt)) && (src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle) && - (draw_sprite_tex || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads(true))); + (draw_sprite_tex || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index->tail % 6) == 0 && TrianglesAreQuads(true))); if (m_texture_shuffle && IsSplitTextureShuffle(rt->m_TEX0, rt->m_valid)) { @@ -4221,13 +4222,13 @@ void GSRendererHW::Draw() if (!m_texture_shuffle && !m_channel_shuffle) { // Try to turn blits in to single sprites, saves upscaling problems when striped clears/blits. - if (m_vt.m_primclass == GS_SPRITE_CLASS && m_primitive_covers_without_gaps == NoGapsType::FullCover && m_index.tail > 2 && (!PRIM->TME || TextureCoversWithoutGapsNotEqual()) && m_vt.m_eq.rgba == 0xFFFF) + if (m_vt.m_primclass == GS_SPRITE_CLASS && m_primitive_covers_without_gaps == NoGapsType::FullCover && m_index->tail > 2 && (!PRIM->TME || TextureCoversWithoutGapsNotEqual()) && m_vt.m_eq.rgba == 0xFFFF) { // Full final framebuffer only. const GSVector2i fb_size = PCRTCDisplays.GetFramebufferSize(-1); if (std::abs(fb_size.x - m_r.width()) <= 1 && std::abs(fb_size.y - m_r.height()) <= 1) { - GSVertex* v = m_vertex.buff; + GSVertex* v = m_vertex->buff; v[0].XYZ.Z = v[1].XYZ.Z; v[0].RGBAQ = v[1].RGBAQ; @@ -4236,23 +4237,23 @@ void GSRendererHW::Draw() m_vt.m_eq.z = true; m_vt.m_eq.f = true; - v[1].XYZ.X = v[m_index.tail - 1].XYZ.X; - v[1].XYZ.Y = v[m_index.tail - 1].XYZ.Y; + v[1].XYZ.X = v[m_index->tail - 1].XYZ.X; + v[1].XYZ.Y = v[m_index->tail - 1].XYZ.Y; if (PRIM->FST) { - v[1].U = v[m_index.tail - 1].U; - v[1].V = v[m_index.tail - 1].V; + v[1].U = v[m_index->tail - 1].U; + v[1].V = v[m_index->tail - 1].V; } else { - v[1].ST.S = v[m_index.tail - 1].ST.S; - v[1].ST.T = v[m_index.tail - 1].ST.T; - v[1].RGBAQ.Q = v[m_index.tail - 1].RGBAQ.Q; + v[1].ST.S = v[m_index->tail - 1].ST.S; + v[1].ST.T = v[m_index->tail - 1].ST.T; + v[1].RGBAQ.Q = v[m_index->tail - 1].RGBAQ.Q; } - m_vertex.head = m_vertex.tail = m_vertex.next = 2; - m_index.tail = 2; + m_vertex->head = m_vertex->tail = m_vertex->next = 2; + m_index->tail = 2; } } @@ -4717,8 +4718,8 @@ void GSRendererHW::Draw() // but it still needs to adjust native stuff from memory as it's not been compensated for upscaling (Dragon Quest 8 font for example). if (CanUpscale() && (m_vt.m_primclass == GS_SPRITE_CLASS) && rt && rt->GetScale() > 1.0f) { - const u32 count = m_vertex.next; - GSVertex* v = &m_vertex.buff[0]; + const u32 count = m_vertex->next; + GSVertex* v = &m_vertex->buff[0]; // Hack to avoid vertical black line in various games (ace combat/tekken) if (GSConfig.UserHacks_AlignSpriteX) @@ -4944,30 +4945,30 @@ bool GSRendererHW::VerifyIndices() switch (m_vt.m_primclass) { case GS_SPRITE_CLASS: - if (m_index.tail % 2 != 0) + if (m_index->tail % 2 != 0) return false; [[fallthrough]]; case GS_POINT_CLASS: // Expect indices to be flat increasing - for (u32 i = 0; i < m_index.tail; i++) + for (u32 i = 0; i < m_index->tail; i++) { - if (m_index.buff[i] != i) + if (m_index->buff[i] != i) return false; } break; case GS_LINE_CLASS: - if (m_index.tail % 2 != 0) + if (m_index->tail % 2 != 0) return false; // Expect each line to be a pair next to each other // VS expand relies on this! - for (u32 i = 0; i < m_index.tail; i += 2) + for (u32 i = 0; i < m_index->tail; i += 2) { - if (m_index.buff[i] + 1 != m_index.buff[i + 1]) + if (m_index->buff[i] + 1 != m_index->buff[i + 1]) return false; } break; case GS_TRIANGLE_CLASS: - if (m_index.tail % 3 != 0) + if (m_index->tail % 3 != 0) return false; break; case GS_INVALID_CLASS: @@ -4992,9 +4993,9 @@ void GSRendererHW::HandleProvokingVertexFirst() // If all first/last vertices have the same color there is nothing to do. bool first_eq_last = true; - for (u32 i = 0; i < m_index.tail; i += n) + for (u32 i = 0; i < m_index->tail; i += n) { - if (m_vertex.buff[m_index.buff[i]].RGBAQ.U32[0] != m_vertex.buff[m_index.buff[i + n - 1]].RGBAQ.U32[0]) + if (m_vertex->buff[m_index->buff[i]].RGBAQ.U32[0] != m_vertex->buff[m_index->buff[i + n - 1]].RGBAQ.U32[0]) { first_eq_last = false; break; @@ -5004,21 +5005,21 @@ void GSRendererHW::HandleProvokingVertexFirst() return; // De-index the vertices using the copy buffer - while (m_vertex.maxcount < m_index.tail) + while (m_vertex->maxcount < m_index->tail) GrowVertexBuffer(); - for (int i = static_cast(m_index.tail) - 1; i >= 0; i--) + for (int i = static_cast(m_index->tail) - 1; i >= 0; i--) { - m_vertex.buff_copy[i] = m_vertex.buff[m_index.buff[i]]; - m_index.buff[i] = static_cast(i); + m_vertex->buff_copy[i] = m_vertex->buff[m_index->buff[i]]; + m_index->buff[i] = static_cast(i); } - std::swap(m_vertex.buff, m_vertex.buff_copy); - m_vertex.head = m_vertex.next = m_vertex.tail = m_index.tail; + std::swap(m_vertex->buff, m_vertex->buff_copy); + m_vertex->head = m_vertex->next = m_vertex->tail = m_index->tail; // Put correct color in the first vertex - for (u32 i = 0; i < m_index.tail; i += n) + for (u32 i = 0; i < m_index->tail; i += n) { - m_vertex.buff[i].RGBAQ.U32[0] = m_vertex.buff[i + n - 1].RGBAQ.U32[0]; - m_vertex.buff[i + n - 1].RGBAQ.U32[0] = 0xff; // Make last vertex red for debugging if used improperly + m_vertex->buff[i].RGBAQ.U32[0] = m_vertex->buff[i + n - 1].RGBAQ.U32[0]; + m_vertex->buff[i + n - 1].RGBAQ.U32[0] = 0xff; // Make last vertex red for debugging if used improperly } } @@ -5028,8 +5029,8 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy, bool req_vert if (GSConfig.UserHacks_ForceEvenSpritePosition && !m_isPackedUV_HackFlag && m_process_texture && PRIM->FST) { - for (u32 i = 0; i < m_vertex.next; i++) - m_vertex.buff[i].UV &= 0x3FEF3FEF; + for (u32 i = 0; i < m_vertex->next; i++) + m_vertex->buff[i].UV &= 0x3FEF3FEF; } const bool unscale_pt_ln = !GSConfig.UserHacks_DisableSafeFeatures && (target_scale != 1.0f); @@ -5055,9 +5056,9 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy, bool req_vert m_conf.vs.expand = GSHWDrawConfig::VSExpand::Point; m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy); m_conf.topology = GSHWDrawConfig::Topology::Triangle; - m_conf.verts = m_vertex.buff; - m_conf.nverts = m_vertex.next; - m_conf.nindices = m_index.tail * 6; + m_conf.verts = m_vertex->buff; + m_conf.nverts = m_vertex->next; + m_conf.nindices = m_index->tail * 6; m_conf.indices_per_prim = 6; return; } @@ -5106,19 +5107,19 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy, bool req_vert if (req_vert_backup) { - memcpy(m_draw_vertex.buff, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); - memcpy(m_draw_index.buff, m_index.buff, sizeof(u16) * m_index.tail); + memcpy(m_draw_vertex.buff, m_vertex->buff, sizeof(GSVertex) * m_vertex->next); + memcpy(m_draw_index.buff, m_index->buff, sizeof(u16) * m_index->tail); m_conf.verts = m_draw_vertex.buff; m_conf.indices = m_draw_index.buff; } else { - m_conf.verts = m_vertex.buff; - m_conf.indices = m_index.buff; + m_conf.verts = m_vertex->buff; + m_conf.indices = m_index->buff; } - m_conf.nverts = m_vertex.next; - m_conf.nindices = m_index.tail * 3; + m_conf.nverts = m_vertex->next; + m_conf.nindices = m_index->tail * 3; m_conf.indices_per_prim = 6; return; } @@ -5140,9 +5141,9 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy, bool req_vert // See note above in GS_SPRITE_CLASS. if (m_vt.m_accurate_stq && m_vt.m_eq.stq) [[unlikely]] { - GSVertex* const v = m_vertex.buff; + GSVertex* const v = m_vertex->buff; const GSVector4 v_q = GSVector4(v[0].RGBAQ.Q); - for (u32 i = 0; i < m_vertex.next; i++) + for (u32 i = 0; i < m_vertex->next; i++) { // v[i].ST.ST /= v[i].RGBAQ.Q; v[i].RGBAQ.Q = 1.0f; (Q / Q = 1) GSVector4 v_st = GSVector4::load(&v[i].ST); @@ -5159,19 +5160,19 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy, bool req_vert if (req_vert_backup) { - memcpy(m_draw_vertex.buff, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); - memcpy(m_draw_index.buff, m_index.buff, sizeof(u16) * m_index.tail); + memcpy(m_draw_vertex.buff, m_vertex->buff, sizeof(GSVertex) * m_vertex->next); + memcpy(m_draw_index.buff, m_index->buff, sizeof(u16) * m_index->tail); m_conf.verts = m_draw_vertex.buff; m_conf.indices = m_draw_index.buff; } else { - m_conf.verts = m_vertex.buff; - m_conf.indices = m_index.buff; + m_conf.verts = m_vertex->buff; + m_conf.indices = m_index->buff; } - m_conf.nverts = m_vertex.next; - m_conf.nindices = m_index.tail; + m_conf.nverts = m_vertex->next; + m_conf.nindices = m_index->tail; } void GSRendererHW::EmulateZbuffer(const GSTextureCache::Target* ds) @@ -5230,7 +5231,7 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS ConvertSpriteTextureShuffle(process_rg, process_ba, shuffle_across, rt, tex); - if (m_index.tail == 0) + if (m_index->tail == 0) return; // Rewriting sprites can result in an empty draw. // If date is enabled you need to test the green channel instead of the alpha channel. @@ -5418,7 +5419,7 @@ __ri u32 GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool t m_conf.ps.urban_chaos_hle = 1; } } - else if (m_index.tail <= 64 && !IsPageCopy() && m_cached_ctx.CLAMP.WMT == 3) + else if (m_index->tail <= 64 && !IsPageCopy() && m_cached_ctx.CLAMP.WMT == 3) { // Blood will tell. I think it is channel effect too but again // implemented in a different way. I don't want to add more CRC stuff. So @@ -5435,7 +5436,7 @@ __ri u32 GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool t } else if (m_cached_ctx.CLAMP.WMS == 3 && ((m_cached_ctx.CLAMP.MAXU & 0x8) == 8)) { - const ChannelFetch channel_select = ((m_cached_ctx.CLAMP.WMT != 3 && (m_vertex.buff[m_index.buff[0]].V & 0x20) == 0) || (m_cached_ctx.CLAMP.WMT == 3 && ((m_cached_ctx.CLAMP.MAXV & 0x2) == 0))) ? ChannelFetch_BLUE : ChannelFetch_ALPHA; + const ChannelFetch channel_select = ((m_cached_ctx.CLAMP.WMT != 3 && (m_vertex->buff[m_index->buff[0]].V & 0x20) == 0) || (m_cached_ctx.CLAMP.WMT == 3 && ((m_cached_ctx.CLAMP.MAXV & 0x2) == 0))) ? ChannelFetch_BLUE : ChannelFetch_ALPHA; // MGS3/Kill Zone if (test_only) @@ -5449,7 +5450,7 @@ __ri u32 GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool t { // Read either Red or Green. Let's check the V coordinate. 0-1 is likely top so // red. 2-3 is likely bottom so green (actually depends on texture base pointer offset) - const bool green = (m_cached_ctx.CLAMP.WMT == 3 && ((m_cached_ctx.CLAMP.MAXV & 0x2) == 2)) || (PRIM->FST && (m_vertex.buff[0].V & 32)); + const bool green = (m_cached_ctx.CLAMP.WMT == 3 && ((m_cached_ctx.CLAMP.MAXV & 0x2) == 2)) || (PRIM->FST && (m_vertex->buff[0].V & 32)); if (green && (m_cached_ctx.FRAME.FBMSK & 0x00FFFFFF) == 0x00FFFFFF) { // Typically used in Terminator 3 @@ -5582,7 +5583,7 @@ __ri u32 GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool t m_channel_shuffle_src_valid = src->m_valid; if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || ((src->m_TEX0.TBW == rt->m_TEX0.TBW) && (!m_in_target_draw && IsPageCopy())) || m_conf.ps.urban_chaos_hle || m_conf.ps.tales_of_abyss_hle) { - GSVertex* s = &m_vertex.buff[0]; + GSVertex* s = &m_vertex->buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); @@ -5624,7 +5625,7 @@ __ri u32 GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool t } m_in_target_draw |= frame_page_offset > 0; - GSVertex* s = &m_vertex.buff[0]; + GSVertex* s = &m_vertex->buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.x << 4)); s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.z << 4)); s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.y << 4)); @@ -5656,8 +5657,8 @@ __ri u32 GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool t m_channel_shuffle_finish = true; } - m_vertex.head = m_vertex.tail = m_vertex.next = 2; - m_index.tail = 2; + m_vertex->head = m_vertex->tail = m_vertex->next = 2; + m_index->tail = 2; m_primitive_covers_without_gaps = NoGapsType::FullCover; m_conf.cb_ps.ChannelShuffleOffset = GSVector2(0, 0); @@ -6601,7 +6602,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, if (can_offset && tex->m_scale > 1.0f) { - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; if (PRIM->FST) { const int x1_frac = ((v[1].XYZ.X - m_context->XYOFFSET.OFX) & 0xf); @@ -6672,7 +6673,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, if (can_offset && tex->m_scale > 1.0f) { - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; if (PRIM->FST) { const int x1_frac = ((v[1].XYZ.X - m_context->XYOFFSET.OFX) & 0xf); @@ -6695,7 +6696,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, } } - if (m_vt.m_primclass == GS_SPRITE_CLASS && m_index.tail >= 4 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16 && + if (m_vt.m_primclass == GS_SPRITE_CLASS && m_index->tail >= 4 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16 && ((tex->m_from_target_TEX0.PSM & 0x30) == 0x30 || GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal > 0)) { HandleManualDeswizzle(); @@ -7381,7 +7382,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta if (rt) { EmulateTextureShuffleAndFbmask(rt, tex); - if (m_index.tail == 0) + if (m_index->tail == 0) { GL_INS("HW: DrawPrims: Texture shuffle emulation culled all vertices; exiting."); return; @@ -7579,7 +7580,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta GL_PERF("DATE: Fast with alpha %d-%d", GetAlphaMinMax().min, GetAlphaMinMax().max); DATE_one = true; } - else if (features.texture_barrier && ((m_vt.m_primclass == GS_SPRITE_CLASS && ComputeDrawlistGetSize(rt->m_scale) < 10) || (m_index.tail < 30))) + else if (features.texture_barrier && ((m_vt.m_primclass == GS_SPRITE_CLASS && ComputeDrawlistGetSize(rt->m_scale) < 10) || (m_index->tail < 30))) { // texture barrier will split the draw call into n draw call. It is very efficient for // few primitive draws. Otherwise it sucks. @@ -7994,7 +7995,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta // This kinda screws things up when using ST, so let's not. if (m_vt.m_primclass == GS_SPRITE_CLASS && rtscale > 1.0f && (tex && PRIM->FST)) { - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; const int x1_frac = ((v[1].XYZ.X - m_context->XYOFFSET.OFX) & 0xf); const int y1_frac = ((v[1].XYZ.Y - m_context->XYOFFSET.OFY) & 0xf); if (x1_frac & 8) @@ -8548,7 +8549,7 @@ bool GSRendererHW::DetectStripedDoubleClear(bool& no_rt, bool& no_ds) !m_cached_ctx.ZBUF.ZMSK && (m_cached_ctx.FRAME.PSM & 0x30) != (m_cached_ctx.ZBUF.PSM & 0x30) && (m_cached_ctx.FRAME.PSM & 0xF) == (m_cached_ctx.ZBUF.PSM & 0xF) && m_vt.m_eq.z == 1 && - m_vertex.buff[1].XYZ.Z == m_vertex.buff[1].RGBAQ.U32[0]; + m_vertex->buff[1].XYZ.Z == m_vertex->buff[1].RGBAQ.U32[0]; // Z and color must be constant and the same and must be drawing strips. if (!z_is_frame || m_vt.m_eq.rgba != 0xFFFF) @@ -8563,12 +8564,12 @@ bool GSRendererHW::DetectStripedDoubleClear(bool& no_rt, bool& no_ds) // LOTR has 4096 verts, so this isn't going to be super fast on that game, most games will be just 16 verts so they should be ok, // and I could cheat and stop when we get a size that matches, but that might be a lucky misdetection, I don't wanna risk it. int vertex_offset = 0; - int last_vertex = m_vertex.buff[0].XYZ.X; + int last_vertex = m_vertex->buff[0].XYZ.X; - for (u32 i = 1; i < m_vertex.tail; i++) + for (u32 i = 1; i < m_vertex->tail; i++) { - vertex_offset = std::max(static_cast((m_vertex.buff[i].XYZ.X - last_vertex) >> 4), vertex_offset); - last_vertex = m_vertex.buff[i].XYZ.X; + vertex_offset = std::max(static_cast((m_vertex->buff[i].XYZ.X - last_vertex) >> 4), vertex_offset); + last_vertex = m_vertex->buff[i].XYZ.X; // Found a gap which is much bigger, no point continuing to scan. if (vertex_offset > strip_size) @@ -8928,7 +8929,7 @@ bool GSRendererHW::TryTargetClear(GSTextureCache::Target* rt, GSTextureCache::Ta if (ds && !preserve_depth && m_r.rintersect(ds->m_valid).eq(ds->m_valid)) { const u32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].fmt * 8); - const u32 z = std::min(max_z, m_vertex.buff[1].XYZ.Z); + const u32 z = std::min(max_z, m_vertex->buff[1].XYZ.Z); const float d = static_cast(z) * 0x1p-32f; GL_INS("HW: TryTargetClear(): DS at %x <= %f", ds->m_TEX0.TBP0, d); g_gs_device->ClearDepth(ds->m_texture, d); @@ -8994,7 +8995,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r if (!no_ds && !preserve_z) { - ClearGSLocalMemory(m_context->offset.zb, m_r, m_vertex.buff[1].XYZ.Z); + ClearGSLocalMemory(m_context->offset.zb, m_r, m_vertex->buff[1].XYZ.Z); if (invalidate_z) { @@ -9169,7 +9170,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) { // Not required when using Tex in RT - if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && + if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex->next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0 && GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled) { GL_PUSH("HW: OI_BlitFMV"); @@ -9284,12 +9285,12 @@ bool GSRendererHW::TextureCoversWithoutGapsNotEqual() } // Simple case: one sprite. - if (m_index.tail == 2) + if (m_index->tail == 2) { return true; } - const GSVertex* v = &m_vertex.buff[0]; + const GSVertex* v = &m_vertex->buff[0]; const int first_dpY = v[1].XYZ.Y - v[0].XYZ.Y; const int first_dpX = v[1].XYZ.X - v[0].XYZ.X; const int first_dtV = v[1].V - v[0].V; @@ -9299,7 +9300,7 @@ bool GSRendererHW::TextureCoversWithoutGapsNotEqual() if ((first_dpX >> 4) == m_r.z) { // Borrowed from MergeSprite() modified to calculate heights. - for (u32 i = 2; i < m_vertex.next; i += 2) + for (u32 i = 2; i < m_vertex->next; i += 2) { const int last_tV = v[i - 1].V; const int dtV = v[i + 1].V - v[i].V; @@ -9317,7 +9318,7 @@ bool GSRendererHW::TextureCoversWithoutGapsNotEqual() if ((first_dpY >> 4) == m_r.w) { // Borrowed from MergeSprite(). - for (u32 i = 2; i < m_vertex.next; i += 2) + for (u32 i = 2; i < m_vertex->next; i += 2) { const int last_tU = v[i - 1].U; const int this_start_U = v[i].U; @@ -9367,7 +9368,7 @@ int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps) return 0; const bool no_resize = (std::abs(draw_size.x - tex_size.x) <= 1 && std::abs(draw_size.y - tex_size.y) <= 1); - const bool can_maintain = no_resize || (!is_target_src && m_index.tail == 2); + const bool can_maintain = no_resize || (!is_target_src && m_index->tail == 2); if (!src || ((!is_target_src || (src->m_from_target->m_downscaled || GSConfig.UserHacks_NativeScaling > GSNativeScaling::Aggressive)) && can_maintain)) return -1; @@ -9403,9 +9404,9 @@ int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps) } // Last ditched check if it's doing a lot of small draws exactly the same which could be recursive lighting bloom. - if (m_vt.m_primclass == GS_SPRITE_CLASS && m_index.tail > 2 && !no_gaps_or_single_sprite && m_context->TEX1.MMAG == 1 && !m_context->ALPHA.IsOpaque()) + if (m_vt.m_primclass == GS_SPRITE_CLASS && m_index->tail > 2 && !no_gaps_or_single_sprite && m_context->TEX1.MMAG == 1 && !m_context->ALPHA.IsOpaque()) { - GSVertex* v = &m_vertex.buff[0]; + GSVertex* v = &m_vertex->buff[0]; float tw = 1 << src->m_TEX0.TW; float th = 1 << src->m_TEX0.TH; @@ -9416,7 +9417,7 @@ int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps) if (first_x > first_u && first_y > first_v && !no_resize && std::abs(draw_size.x - first_x) <= 4 && std::abs(draw_size.y - first_y) <= 4) { - for (u32 i = 2; i < m_index.tail; i += 2) + for (u32 i = 2; i < m_index->tail; i += 2) { const int next_u = (PRIM->FST) ? (v[i + 1].U - v[i].U) >> 4 : std::floor(static_cast(tw * v[i + 1].ST.S) - static_cast(tw * v[i].ST.S)); const int next_v = (PRIM->FST) ? (v[i + 1].V - v[i].V) >> 4 : std::floor(static_cast(th * v[i + 1].ST.T) - static_cast(th * v[i].ST.T)); @@ -9429,7 +9430,7 @@ int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps) if (next_u != first_u || next_v != first_v || next_x != first_x || next_y != first_y) break; - if (i + 2 >= m_index.tail) + if (i + 2 >= m_index->tail) return 2; } } @@ -9440,13 +9441,13 @@ int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps) ClearType GSRendererHW::IsConstantDirectWriteMemClear() { - const bool direct_draw = (m_vt.m_primclass == GS_SPRITE_CLASS) || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads()); + const bool direct_draw = (m_vt.m_primclass == GS_SPRITE_CLASS) || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index->tail % 6) == 0 && TrianglesAreQuads()); // Constant Direct Write without texture/test/blending (aka a GS mem clear) if (direct_draw && !PRIM->TME // Direct write && !(m_draw_env->SCANMSK.MSK & 2) && !m_cached_ctx.TEST.ATE // no alpha test && !m_cached_ctx.TEST.DATE // no destination alpha test && (!m_cached_ctx.TEST.ZTE || m_cached_ctx.TEST.ZTST == ZTST_ALWAYS) // no depth test - && (m_vt.m_eq.rgba == 0xFFFF || m_vertex.next == 2) // constant color write + && (m_vt.m_eq.rgba == 0xFFFF || m_vertex->next == 2) // constant color write && (!PRIM->FGE || m_vt.m_min.p.w == 255.0f)) // No fog effect { if ((PRIM->ABE && !m_context->ALPHA.IsOpaque()) || (m_cached_ctx.FRAME.FBMSK & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk)) @@ -9461,7 +9462,7 @@ u32 GSRendererHW::GetConstantDirectWriteMemClearColor() const { // Take the vertex colour, but check if the blending would make it black. const u32 vert_index = (m_vt.m_primclass == GS_TRIANGLE_CLASS) ? 2 : 1; - u32 vert_color = m_vertex.buff[m_index.buff[vert_index]].RGBAQ.U32[0]; + u32 vert_color = m_vertex->buff[m_index->buff[vert_index]].RGBAQ.U32[0]; if (PRIM->ABE && m_context->ALPHA.IsBlack()) vert_color &= 0xFF000000u; @@ -9482,7 +9483,7 @@ u32 GSRendererHW::GetConstantDirectWriteMemClearColor() const u32 GSRendererHW::GetConstantDirectWriteMemClearDepth() const { const u32 max_z = (0xFFFFFFFF >> (GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].fmt * 8)); - return std::min(m_vertex.buff[1].XYZ.Z, max_z); + return std::min(m_vertex->buff[1].XYZ.Z, max_z); } bool GSRendererHW::IsReallyDithered() const @@ -9504,7 +9505,7 @@ void GSRendererHW::ReplaceVerticesWithSprite(const GSVector4i& unscaled_rect, co { const GSVector4i fpr = unscaled_rect.sll32<4>(); const GSVector4i fpuv = unscaled_uv_rect.sll32<4>(); - GSVertex* v = m_vertex.buff; + GSVertex* v = m_vertex->buff; v[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + fpr.x); v[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + fpr.y); @@ -9546,8 +9547,8 @@ void GSRendererHW::ReplaceVerticesWithSprite(const GSVector4i& unscaled_rect, co m_vt.m_eq.z = true; m_vt.m_eq.f = true; - m_vertex.head = m_vertex.tail = m_vertex.next = 2; - m_index.tail = 2; + m_vertex->head = m_vertex->tail = m_vertex->next = 2; + m_index->tail = 2; m_r = unscaled_rect; m_context->scissor.in = scissor; @@ -9573,10 +9574,10 @@ void GSRendererHW::OffsetDraw(s32 fbp_offset, s32 zbp_offset, s32 xoffset, s32 y const s32 fp_xoffset = xoffset << 4; const s32 fp_yoffset = yoffset << 4; - for (u32 i = 0; i < m_vertex.next; i++) + for (u32 i = 0; i < m_vertex->next; i++) { - m_vertex.buff[i].XYZ.X += fp_xoffset; - m_vertex.buff[i].XYZ.Y += fp_yoffset; + m_vertex->buff[i].XYZ.X += fp_xoffset; + m_vertex->buff[i].XYZ.Y += fp_yoffset; } } diff --git a/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp b/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp index 1a3b99ce61..79fc424955 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp @@ -39,21 +39,21 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b GSRasterizerData data; GSScanlineGlobalData& gd = data.global; - hw.m_sw_vertex_buffer.resize(((hw.m_vertex.next + 1) & ~1)); + hw.m_sw_vertex_buffer.resize(((hw.m_vertex->next + 1) & ~1)); data.primclass = vt.m_primclass; data.buff = nullptr; data.vertex = hw.m_sw_vertex_buffer.data(); - data.vertex_count = hw.m_vertex.next; - data.index = hw.m_index.buff; - data.index_count = hw.m_index.tail; + data.vertex_count = hw.m_vertex->next; + data.index = hw.m_index->buff; + data.index_count = hw.m_index->tail; data.scanmsk_value = env.SCANMSK.MSK; // Skip per pixel division if q is constant. // Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !vt.m_eq.q. // If you have both GS_SPRITE_CLASS && vt.m_eq.q, it will depends on the first part of the 'OR'. const u32 q_div = !hw.IsMipMapActive() && ((vt.m_eq.q && vt.m_min.t.z != 1.0f) || (!vt.m_eq.q && vt.m_primclass == GS_SPRITE_CLASS)); - GSVertexSW::s_cvb[vt.m_primclass][PRIM->TME][PRIM->FST][q_div](context, data.vertex, hw.m_vertex.buff, hw.m_vertex.next); + GSVertexSW::s_cvb[vt.m_primclass][PRIM->TME][PRIM->FST][q_div](context, data.vertex, hw.m_vertex->buff, hw.m_vertex->next); GSVector4i scissor = context->scissor.in; GSVector4i bbox = GSVector4i(vt.m_min.p.floor().xyxy(vt.m_max.p.ceil())).rintersect(scissor); @@ -524,12 +524,12 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b const u32 ofx = context->XYOFFSET.OFX; - for (int i = 0, j = hw.m_vertex.tail; i < j; i++) + for (int i = 0, j = hw.m_vertex->tail; i < j; i++) { #if _M_SSE >= 0x501 - if ((((hw.m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8 + if ((((hw.m_vertex->buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8 #else - if ((((hw.m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4 + if ((((hw.m_vertex->buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4 #endif { gd.sel.notest = 0; diff --git a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp index 2c53496961..7dec91d06c 100644 --- a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp +++ b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp @@ -320,14 +320,14 @@ void GSRendererSW::RewriteVerticesIfSTOverflow() constexpr int n = GSUtil::GetClassVertexCount(primclass); // Make sure the copy buffer is large enough. - while (m_vertex.maxcount < m_index.tail) + while (m_vertex->maxcount < m_index->tail) GrowVertexBuffer(); - GSVertex* RESTRICT vertex = m_vertex.buff; - GSVertex* RESTRICT vertex_copy = m_vertex.buff_copy; - u16* RESTRICT index = m_index.buff; + GSVertex* RESTRICT vertex = m_vertex->buff; + GSVertex* RESTRICT vertex_copy = m_vertex->buff_copy; + u16* RESTRICT index = m_index->buff; - for (int i = 0; i < static_cast(m_index.tail); i += n) + for (int i = 0; i < static_cast(m_index->tail); i += n) { GSVector4 stcq[n]; @@ -381,18 +381,18 @@ void GSRendererSW::RewriteVerticesIfSTOverflow() } // Swap the buffers and fix the counts. - std::swap(m_vertex.buff, m_vertex.buff_copy); - m_vertex.head = m_vertex.next = m_vertex.tail = m_index.tail; + std::swap(m_vertex->buff, m_vertex->buff_copy); + m_vertex->head = m_vertex->next = m_vertex->tail = m_index->tail; // Recalculate ST min/max/eq in the vertex trace. GSVector4 tmin = GSVector4::cxpr(FLT_MAX); GSVector4 tmax = GSVector4::cxpr(-FLT_MAX); - for (int i = 0; i < static_cast(m_index.tail); i += n) + for (int i = 0; i < static_cast(m_index->tail); i += n) { for (int j = 0; j < n; j++) { - GSVector4 stcq = GSVector4::cast(GSVector4i(m_vertex.buff[i + j].m[0])); - const float Q = (primclass == GS_SPRITE_CLASS) ? stcq.w : m_vertex.buff[i + 1].RGBAQ.Q; + GSVector4 stcq = GSVector4::cast(GSVector4i(m_vertex->buff[i + j].m[0])); + const float Q = (primclass == GS_SPRITE_CLASS) ? stcq.w : m_vertex->buff[i + 1].RGBAQ.Q; stcq = (stcq / Q).xyzw(stcq); tmin = tmin.min(stcq); @@ -451,11 +451,11 @@ void GSRendererSW::Draw() SharedData* sd = static_cast(data.get()); sd->primclass = m_vt.m_primclass; - sd->buff = (u8*)m_vertex_heap.alloc(sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1) + sizeof(u32) * m_index.tail, 64); + sd->buff = (u8*)m_vertex_heap.alloc(sizeof(GSVertexSW) * ((m_vertex->next + 1) & ~1) + sizeof(u32) * m_index->tail, 64); sd->vertex = (GSVertexSW*)sd->buff; - sd->vertex_count = m_vertex.next; - sd->index = (u16*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1)); - sd->index_count = m_index.tail; + sd->vertex_count = m_vertex->next; + sd->index = (u16*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex->next + 1) & ~1)); + sd->index_count = m_index->tail; sd->scanmsk_value = m_draw_env->SCANMSK.MSK; // skip per pixel division if q is constant. @@ -463,9 +463,9 @@ void GSRendererSW::Draw() // If you have both GS_SPRITE_CLASS && m_vt.m_eq.q, it will depends on the first part of the 'OR' u32 q_div = !IsMipMapActive() && ((m_vt.m_eq.q && m_vt.m_min.t.z != 1.0f) || (!m_vt.m_eq.q && m_vt.m_primclass == GS_SPRITE_CLASS)); - GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, sd->vertex, m_vertex.buff, m_vertex.next); + GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, sd->vertex, m_vertex->buff, m_vertex->next); - std::memcpy(sd->index, m_index.buff, sizeof(u16) * m_index.tail); + std::memcpy(sd->index, m_index->buff, sizeof(u16) * m_index->tail); GSVector4i scissor = context->scissor.in; GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().upld(m_vt.m_max.p.floor())) + GSVector4i(0, 0, 1, 1); // right/bottom should be exclusive so +1 @@ -485,12 +485,12 @@ void GSRendererSW::Draw() { int n = GSUtil::GetVertexCount(PRIM->PRIM); - for (u32 i = 0, j = 0; i < m_index.tail; i += n, j++) + for (u32 i = 0, j = 0; i < m_index->tail; i += n, j++) { for (int k = 0; k < n; k++) { - GSVertex* v = &m_vertex.buff[m_index.buff[i + k]]; - GSVertex* vn = &m_vertex.buff[m_index.buff[i + n - 1]]; + GSVertex* v = &m_vertex->buff[m_index->buff[i + k]]; + GSVertex* vn = &m_vertex->buff[m_index->buff[i + n - 1]]; fprintf(s_fp, "%d:%d %f %f %f %f\n", j, k, @@ -1505,12 +1505,12 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) u32 ofx = context->XYOFFSET.OFX; - for (int i = 0, j = m_vertex.tail; i < j; i++) + for (int i = 0, j = m_vertex->tail; i < j; i++) { #if _M_SSE >= 0x501 - if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8 + if ((((m_vertex->buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8 #else - if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4 + if ((((m_vertex->buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4 #endif { gd.sel.notest = 0;