DMA: Better enforce CPU runtime during linked list

and get rid of the hack for Newman Haas.
This commit is contained in:
Stenzek 2024-04-10 20:00:09 +10:00
parent f63f5d829d
commit 417bf0c3bc
No known key found for this signature in database
7 changed files with 105 additions and 125 deletions

View File

@ -90470,7 +90470,7 @@ SLPS-02376:
- DigitalController
settings:
dmaMaxSliceTicks: 100
dmaHaltTicks: 150
dmaHaltTicks: 200
codes:
- SLPS-02376
- SLPS-02356

View File

@ -47,7 +47,7 @@ static constexpr PhysicalMemoryAddress LINKED_LIST_TERMINATOR = UINT32_C(0x00FFF
static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10;
static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5;
static constexpr TickCount HALT_TICKS_WHEN_TRANSMITTING_PAD = 100;
static constexpr TickCount SLICE_SIZE_WHEN_TRANSMITTING_PAD = 10;
struct ChannelState
{
@ -194,7 +194,7 @@ static TickCount TransferDeviceToMemory(u32 address, u32 increment, u32 word_cou
template<Channel channel>
static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count);
static TickCount GetMaxSliceTicks();
// configuration
static TickCount s_max_slice_ticks = 1000;
@ -543,6 +543,17 @@ ALWAYS_INLINE_RELEASE void DMA::CompleteTransfer(Channel channel, ChannelState&
}
}
TickCount DMA::GetMaxSliceTicks()
{
const TickCount max = Pad::IsTransmitting() ? SLICE_SIZE_WHEN_TRANSMITTING_PAD : s_max_slice_ticks;
if (!TimingEvents::IsRunningEvents())
return max;
const u32 current_ticks = TimingEvents::GetGlobalTickCounter();
const u32 max_ticks = TimingEvents::GetEventRunTickCounter() + static_cast<u32>(max);
return std::clamp(static_cast<TickCount>(max_ticks - current_ticks), 0, max);
}
template<DMA::Channel channel>
bool DMA::TransferChannel()
{
@ -586,35 +597,13 @@ bool DMA::TransferChannel()
return true;
}
if constexpr (channel == Channel::GPU)
{
// Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the
// controller. Having a large slice size causes the serial transfer to complete before the silly busy wait
// in the BIOS poll routine returns, resulting in it thinking that the controller is disconnected. Some games
// are very sensitive to this (e.g. Newman Haas Racing), to the point that even using a slice size of 1 is
// insufficient for avoiding the race, probably due to the linked list layout.
//
// Therefore, without major refactoring to ensure the CPU runs every DMA block, and the associated performance
// penalty, we just halt the DMA until the serial transfers have completed. To reduce the chances of this
// significantly affecting timing, we add accumulate the ticks that have been "lost", and allow them to be
// "used up" when the transfer does happen.
//
if (Pad::IsTransmitting())
{
Log_DebugFmt("DMA transfer while transmitting pad - {} ticks are buffered", -s_halt_ticks_remaining);
if (!s_unhalt_event->IsActive())
s_unhalt_event->SetIntervalAndSchedule(HALT_TICKS_WHEN_TRANSMITTING_PAD);
return false;
}
}
Log_DebugFmt("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address);
// Prove to the compiler that nothing's going to modify these.
const u8* const ram_ptr = Bus::g_ram;
const u32 mask = Bus::g_ram_mask;
const TickCount slice_ticks = s_max_slice_ticks + -s_halt_ticks_remaining;
const TickCount slice_ticks = GetMaxSliceTicks();
TickCount remaining_ticks = slice_ticks;
while (cs.request && remaining_ticks > 0)
{
@ -658,9 +647,6 @@ bool DMA::TransferChannel()
cs.base_address = current_address;
if (cs.request)
{
// don't actually delay the transfer for the buffered ticks, this variable is dual-purposed.
s_halt_ticks_remaining = std::max(s_halt_ticks_remaining, 0);
// stall the transfer for a bit if we ran for too long
HaltTransfer(s_halt_ticks);
return false;
@ -681,7 +667,7 @@ bool DMA::TransferChannel()
const u32 block_size = cs.block_control.request.GetBlockSize();
u32 blocks_remaining = cs.block_control.request.GetBlockCount();
TickCount ticks_remaining = s_max_slice_ticks;
TickCount ticks_remaining = GetMaxSliceTicks();
if (copy_to_device)
{

View File

@ -467,7 +467,6 @@ void GPU::WriteRegister(u32 offset, u32 value)
case 0x00:
m_fifo.Push(value);
ExecuteCommands();
UpdateCommandTickEvent();
return;
case 0x04:
@ -495,16 +494,7 @@ void GPU::DMARead(u32* words, u32 word_count)
void GPU::EndDMAWrite()
{
m_fifo_pushed = true;
if (!m_syncing)
{
ExecuteCommands();
UpdateCommandTickEvent();
}
else
{
UpdateDMARequest();
}
ExecuteCommands();
}
/**
@ -1029,26 +1019,24 @@ void GPU::CRTCTickEvent(TickCount ticks)
void GPU::CommandTickEvent(TickCount ticks)
{
m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
m_command_tick_event->Deactivate();
// we can be syncing if this came from a DMA write. recursively executing commands would be bad.
if (!m_syncing)
ExecuteCommands();
UpdateGPUIdle();
if (m_pending_command_ticks <= 0)
m_pending_command_ticks = 0;
else
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
m_executing_commands = true;
ExecuteCommands();
UpdateCommandTickEvent();
m_executing_commands = false;
}
void GPU::UpdateCommandTickEvent()
{
if (m_pending_command_ticks <= 0)
{
m_pending_command_ticks = 0;
m_command_tick_event->Deactivate();
else if (!m_command_tick_event->IsActive())
}
else
{
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
}
}
void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x,
@ -1121,7 +1109,6 @@ u32 GPU::ReadGPUREAD()
// end of transfer, catch up on any commands which were written (unlikely)
ExecuteCommands();
UpdateCommandTickEvent();
break;
}
}

View File

@ -307,6 +307,7 @@ protected:
void WriteGP1(u32 value);
void EndCommand();
void ExecuteCommands();
void TryExecuteCommands();
void HandleGetGPUInfoCommand(u32 value);
// Rendering in the backend
@ -542,8 +543,7 @@ protected:
u32 m_GPUREAD_latch = 0;
/// True if currently executing/syncing.
bool m_syncing = false;
bool m_fifo_pushed = false;
bool m_executing_commands = false;
struct VRAMTransfer
{

View File

@ -25,94 +25,93 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
return value == 0 ? value_for_zero : value;
}
void GPU::ExecuteCommands()
void GPU::TryExecuteCommands()
{
m_syncing = true;
for (;;)
while (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
{
if (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
switch (m_blitter_state)
{
switch (m_blitter_state)
case BlitterState::Idle:
{
case BlitterState::Idle:
const u32 command = FifoPeek(0) >> 24;
if ((this->*s_GP0_command_handler_table[command])())
continue;
else
return;
}
case BlitterState::WritingVRAM:
{
DebugAssert(m_blit_remaining_words > 0);
const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
for (u32 i = 0; i < words_to_copy; i++)
m_blit_buffer.push_back(FifoPop());
m_blit_remaining_words -= words_to_copy;
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
if (m_blit_remaining_words == 0)
FinishVRAMWrite();
continue;
}
case BlitterState::ReadingVRAM:
{
return;
}
break;
case BlitterState::DrawingPolyLine:
{
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
u32 terminator_index =
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
{
const u32 command = FifoPeek(0) >> 24;
if ((this->*s_GP0_command_handler_table[command])())
continue;
else
goto batch_done;
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
// terminator is on the first word for the vertex
if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
break;
}
case BlitterState::WritingVRAM:
const bool found_terminator = (terminator_index < m_fifo.GetSize());
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
if (words_to_copy > 0)
{
DebugAssert(m_blit_remaining_words > 0);
const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
for (u32 i = 0; i < words_to_copy; i++)
m_blit_buffer.push_back(FifoPop());
m_blit_remaining_words -= words_to_copy;
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
if (m_blit_remaining_words == 0)
FinishVRAMWrite();
}
Log_DebugPrintf("Added %u words to polyline", words_to_copy);
if (found_terminator)
{
// drop terminator
m_fifo.RemoveOne();
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
DispatchRenderCommand();
m_blit_buffer.clear();
EndCommand();
continue;
}
case BlitterState::ReadingVRAM:
{
goto batch_done;
}
break;
case BlitterState::DrawingPolyLine:
{
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
u32 terminator_index =
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
{
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
// terminator is on the first word for the vertex
if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
break;
}
const bool found_terminator = (terminator_index < m_fifo.GetSize());
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
if (words_to_copy > 0)
{
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
for (u32 i = 0; i < words_to_copy; i++)
m_blit_buffer.push_back(FifoPop());
}
Log_DebugPrintf("Added %u words to polyline", words_to_copy);
if (found_terminator)
{
// drop terminator
m_fifo.RemoveOne();
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
DispatchRenderCommand();
m_blit_buffer.clear();
EndCommand();
continue;
}
}
break;
}
}
batch_done:
m_fifo_pushed = false;
UpdateDMARequest();
if (!m_fifo_pushed)
break;
}
}
}
void GPU::ExecuteCommands()
{
const bool was_executing_from_event = std::exchange(m_executing_commands, true);
TryExecuteCommands();
UpdateDMARequest();
UpdateGPUIdle();
m_syncing = false;
m_executing_commands = was_executing_from_event;
if (!was_executing_from_event)
UpdateCommandTickEvent();
}
void GPU::EndCommand()

View File

@ -17,6 +17,7 @@ static TimingEvent* s_active_events_tail;
static TimingEvent* s_current_event = nullptr;
static u32 s_active_event_count = 0;
static u32 s_global_tick_counter = 0;
static u32 s_event_run_tick_counter = 0;
static bool s_frame_done = false;
u32 GetGlobalTickCounter()
@ -24,6 +25,11 @@ u32 GetGlobalTickCounter()
return s_global_tick_counter;
}
u32 GetEventRunTickCounter()
{
return s_event_run_tick_counter;
}
void Initialize()
{
Reset();
@ -293,6 +299,7 @@ void RunEvents()
if (pending_ticks >= s_active_events_head->GetDowncount())
{
CPU::ResetPendingTicks();
s_event_run_tick_counter = s_global_tick_counter + static_cast<u32>(pending_ticks);
do
{

View File

@ -81,6 +81,7 @@ public:
namespace TimingEvents {
u32 GetGlobalTickCounter();
u32 GetEventRunTickCounter();
void Initialize();
void Reset();