DMA: Better enforce CPU runtime during linked list

and get rid of the hack for Newman Haas.
2024-11-23 05:49:43 +00:00 · 2024-04-10 20:00:09 +10:00 · 2024-04-10 20:00:09 +10:00 · 417bf0c3bc
commit 417bf0c3bc
parent f63f5d829d
7 changed files with 105 additions and 125 deletions
--- a/data/resources/gamedb.yaml
+++ b/data/resources/gamedb.yaml
@ -90470,7 +90470,7 @@ SLPS-02376:
    - DigitalController
  settings:
    dmaMaxSliceTicks: 100
-    dmaHaltTicks: 150
+    dmaHaltTicks: 200
  codes:
    - SLPS-02376
    - SLPS-02356
--- a/src/core/dma.cpp
+++ b/src/core/dma.cpp
@ -47,7 +47,7 @@ static constexpr PhysicalMemoryAddress LINKED_LIST_TERMINATOR = UINT32_C(0x00FFF

 static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10;
 static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5;
-static constexpr TickCount HALT_TICKS_WHEN_TRANSMITTING_PAD = 100;
+static constexpr TickCount SLICE_SIZE_WHEN_TRANSMITTING_PAD = 10;

 struct ChannelState
 {
@ -194,7 +194,7 @@ static TickCount TransferDeviceToMemory(u32 address, u32 increment, u32 word_cou
 template<Channel channel>
 static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count);

-
+static TickCount GetMaxSliceTicks();

 // configuration
 static TickCount s_max_slice_ticks = 1000;
@ -543,6 +543,17 @@ ALWAYS_INLINE_RELEASE void DMA::CompleteTransfer(Channel channel, ChannelState&
  }
 }

+TickCount DMA::GetMaxSliceTicks()
+{
+  const TickCount max = Pad::IsTransmitting() ? SLICE_SIZE_WHEN_TRANSMITTING_PAD : s_max_slice_ticks;
+  if (!TimingEvents::IsRunningEvents())
+    return max;
+
+  const u32 current_ticks = TimingEvents::GetGlobalTickCounter();
+  const u32 max_ticks = TimingEvents::GetEventRunTickCounter() + static_cast<u32>(max);
+  return std::clamp(static_cast<TickCount>(max_ticks - current_ticks), 0, max);
+}
+
 template<DMA::Channel channel>
 bool DMA::TransferChannel()
 {
@ -586,35 +597,13 @@ bool DMA::TransferChannel()
        return true;
      }

-      if constexpr (channel == Channel::GPU)
-      {
-        // Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the
-        // controller. Having a large slice size causes the serial transfer to complete before the silly busy wait
-        // in the BIOS poll routine returns, resulting in it thinking that the controller is disconnected. Some games
-        // are very sensitive to this (e.g. Newman Haas Racing), to the point that even using a slice size of 1 is
-        // insufficient for avoiding the race, probably due to the linked list layout.
-        //
-        // Therefore, without major refactoring to ensure the CPU runs every DMA block, and the associated performance
-        // penalty, we just halt the DMA until the serial transfers have completed. To reduce the chances of this
-        // significantly affecting timing, we add accumulate the ticks that have been "lost", and allow them to be
-        // "used up" when the transfer does happen.
-        //
-        if (Pad::IsTransmitting())
-        {
-          Log_DebugFmt("DMA transfer while transmitting pad - {} ticks are buffered", -s_halt_ticks_remaining);
-          if (!s_unhalt_event->IsActive())
-            s_unhalt_event->SetIntervalAndSchedule(HALT_TICKS_WHEN_TRANSMITTING_PAD);
-          return false;
-        }
-      }
-
      Log_DebugFmt("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address);

      // Prove to the compiler that nothing's going to modify these.
      const u8* const ram_ptr = Bus::g_ram;
      const u32 mask = Bus::g_ram_mask;

-      const TickCount slice_ticks = s_max_slice_ticks + -s_halt_ticks_remaining;
+      const TickCount slice_ticks = GetMaxSliceTicks();
      TickCount remaining_ticks = slice_ticks;
      while (cs.request && remaining_ticks > 0)
      {
@ -658,9 +647,6 @@ bool DMA::TransferChannel()
      cs.base_address = current_address;
      if (cs.request)
      {
-        // don't actually delay the transfer for the buffered ticks, this variable is dual-purposed.
-        s_halt_ticks_remaining = std::max(s_halt_ticks_remaining, 0);
-
        // stall the transfer for a bit if we ran for too long
        HaltTransfer(s_halt_ticks);
        return false;
@ -681,7 +667,7 @@ bool DMA::TransferChannel()

      const u32 block_size = cs.block_control.request.GetBlockSize();
      u32 blocks_remaining = cs.block_control.request.GetBlockCount();
-      TickCount ticks_remaining = s_max_slice_ticks;
+      TickCount ticks_remaining = GetMaxSliceTicks();

      if (copy_to_device)
      {
--- a/src/core/gpu.cpp
+++ b/src/core/gpu.cpp
@ -467,7 +467,6 @@ void GPU::WriteRegister(u32 offset, u32 value)
    case 0x00:
      m_fifo.Push(value);
      ExecuteCommands();
-      UpdateCommandTickEvent();
      return;

    case 0x04:
@ -495,16 +494,7 @@ void GPU::DMARead(u32* words, u32 word_count)

 void GPU::EndDMAWrite()
 {
-  m_fifo_pushed = true;
-  if (!m_syncing)
-  {
-    ExecuteCommands();
-    UpdateCommandTickEvent();
-  }
-  else
-  {
-    UpdateDMARequest();
-  }
+  ExecuteCommands();
 }

 /**
@ -1029,26 +1019,24 @@ void GPU::CRTCTickEvent(TickCount ticks)
 void GPU::CommandTickEvent(TickCount ticks)
 {
  m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
-  m_command_tick_event->Deactivate();

-  // we can be syncing if this came from a DMA write. recursively executing commands would be bad.
-  if (!m_syncing)
-    ExecuteCommands();
-
-  UpdateGPUIdle();
-
-  if (m_pending_command_ticks <= 0)
-    m_pending_command_ticks = 0;
-  else
-    m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
+  m_executing_commands = true;
+  ExecuteCommands();
+  UpdateCommandTickEvent();
+  m_executing_commands = false;
 }

 void GPU::UpdateCommandTickEvent()
 {
  if (m_pending_command_ticks <= 0)
+  {
+    m_pending_command_ticks = 0;
    m_command_tick_event->Deactivate();
-  else if (!m_command_tick_event->IsActive())
+  }
+  else
+  {
    m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
+  }
 }

 void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x,
@ -1121,7 +1109,6 @@ u32 GPU::ReadGPUREAD()

        // end of transfer, catch up on any commands which were written (unlikely)
        ExecuteCommands();
-        UpdateCommandTickEvent();
        break;
      }
    }
--- a/src/core/gpu.h
+++ b/src/core/gpu.h
@ -307,6 +307,7 @@ protected:
  void WriteGP1(u32 value);
  void EndCommand();
  void ExecuteCommands();
+  void TryExecuteCommands();
  void HandleGetGPUInfoCommand(u32 value);

  // Rendering in the backend
@ -542,8 +543,7 @@ protected:
  u32 m_GPUREAD_latch = 0;

  /// True if currently executing/syncing.
-  bool m_syncing = false;
-  bool m_fifo_pushed = false;
+  bool m_executing_commands = false;

  struct VRAMTransfer
  {
--- a/src/core/gpu_commands.cpp
+++ b/src/core/gpu_commands.cpp
@ -25,94 +25,93 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
  return value == 0 ? value_for_zero : value;
 }

-void GPU::ExecuteCommands()
+void GPU::TryExecuteCommands()
 {
-  m_syncing = true;
-
-  for (;;)
+  while (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
  {
-    if (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
+    switch (m_blitter_state)
    {
-      switch (m_blitter_state)
+      case BlitterState::Idle:
      {
-        case BlitterState::Idle:
+        const u32 command = FifoPeek(0) >> 24;
+        if ((this->*s_GP0_command_handler_table[command])())
+          continue;
+        else
+          return;
+      }
+
+      case BlitterState::WritingVRAM:
+      {
+        DebugAssert(m_blit_remaining_words > 0);
+        const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
+        m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
+        for (u32 i = 0; i < words_to_copy; i++)
+          m_blit_buffer.push_back(FifoPop());
+        m_blit_remaining_words -= words_to_copy;
+
+        Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
+        if (m_blit_remaining_words == 0)
+          FinishVRAMWrite();
+
+        continue;
+      }
+
+      case BlitterState::ReadingVRAM:
+      {
+        return;
+      }
+      break;
+
+      case BlitterState::DrawingPolyLine:
+      {
+        const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
+        u32 terminator_index =
+          m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
+        for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
        {
-          const u32 command = FifoPeek(0) >> 24;
-          if ((this->*s_GP0_command_handler_table[command])())
-            continue;
-          else
-            goto batch_done;
+          // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
+          // terminator is on the first word for the vertex
+          if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
+            break;
        }

-        case BlitterState::WritingVRAM:
+        const bool found_terminator = (terminator_index < m_fifo.GetSize());
+        const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
+        if (words_to_copy > 0)
        {
-          DebugAssert(m_blit_remaining_words > 0);
-          const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
          m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
          for (u32 i = 0; i < words_to_copy; i++)
            m_blit_buffer.push_back(FifoPop());
-          m_blit_remaining_words -= words_to_copy;
-
-          Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
-          if (m_blit_remaining_words == 0)
-            FinishVRAMWrite();
+        }

+        Log_DebugPrintf("Added %u words to polyline", words_to_copy);
+        if (found_terminator)
+        {
+          // drop terminator
+          m_fifo.RemoveOne();
+          Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
+          DispatchRenderCommand();
+          m_blit_buffer.clear();
+          EndCommand();
          continue;
        }
-
-        case BlitterState::ReadingVRAM:
-        {
-          goto batch_done;
-        }
-        break;
-
-        case BlitterState::DrawingPolyLine:
-        {
-          const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
-          u32 terminator_index =
-            m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
-          for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
-          {
-            // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
-            // terminator is on the first word for the vertex
-            if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
-              break;
-          }
-
-          const bool found_terminator = (terminator_index < m_fifo.GetSize());
-          const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
-          if (words_to_copy > 0)
-          {
-            m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
-            for (u32 i = 0; i < words_to_copy; i++)
-              m_blit_buffer.push_back(FifoPop());
-          }
-
-          Log_DebugPrintf("Added %u words to polyline", words_to_copy);
-          if (found_terminator)
-          {
-            // drop terminator
-            m_fifo.RemoveOne();
-            Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
-            DispatchRenderCommand();
-            m_blit_buffer.clear();
-            EndCommand();
-            continue;
-          }
-        }
-        break;
      }
-    }
-
-  batch_done:
-    m_fifo_pushed = false;
-    UpdateDMARequest();
-    if (!m_fifo_pushed)
      break;
+    }
  }
+}

+void GPU::ExecuteCommands()
+{
+  const bool was_executing_from_event = std::exchange(m_executing_commands, true);
+
+  TryExecuteCommands();
+  UpdateDMARequest();
  UpdateGPUIdle();
-  m_syncing = false;
+
+  m_executing_commands = was_executing_from_event;
+  if (!was_executing_from_event)
+    UpdateCommandTickEvent();
 }

 void GPU::EndCommand()
--- a/src/core/timing_event.cpp
+++ b/src/core/timing_event.cpp
@ -17,6 +17,7 @@ static TimingEvent* s_active_events_tail;
 static TimingEvent* s_current_event = nullptr;
 static u32 s_active_event_count = 0;
 static u32 s_global_tick_counter = 0;
+static u32 s_event_run_tick_counter = 0;
 static bool s_frame_done = false;

 u32 GetGlobalTickCounter()
@ -24,6 +25,11 @@ u32 GetGlobalTickCounter()
  return s_global_tick_counter;
 }

+u32 GetEventRunTickCounter()
+{
+  return s_event_run_tick_counter;
+}
+
 void Initialize()
 {
  Reset();
@ -293,6 +299,7 @@ void RunEvents()
    if (pending_ticks >= s_active_events_head->GetDowncount())
    {
      CPU::ResetPendingTicks();
+      s_event_run_tick_counter = s_global_tick_counter + static_cast<u32>(pending_ticks);

      do
      {
--- a/src/core/timing_event.h
+++ b/src/core/timing_event.h
@ -81,6 +81,7 @@ public:
 namespace TimingEvents {

 u32 GetGlobalTickCounter();
+u32 GetEventRunTickCounter();

 void Initialize();
 void Reset();