Merge pull request #1213 from unknownbrackets/dlist-cycles

Try to match GPU cycles to the CPU
This commit is contained in:
Henrik Rydgård 2013-04-08 00:43:33 -07:00
commit dea37e5521
8 changed files with 145 additions and 43 deletions

View File

@ -315,6 +315,52 @@ s64 UnscheduleEvent(int event_type, u64 userdata)
return result;
}
s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata)
{
s64 result = 0;
std::lock_guard<std::recursive_mutex> lk(externalEventSection);
if (!tsFirst)
return result;
while(tsFirst)
{
if (tsFirst->type == event_type && tsFirst->userdata == userdata)
{
result = tsFirst->time - globalTimer;
Event *next = tsFirst->next;
FreeTsEvent(tsFirst);
tsFirst = next;
}
else
{
break;
}
}
if (!tsFirst)
return result;
Event *prev = tsFirst;
Event *ptr = prev->next;
while (ptr)
{
if (ptr->type == event_type && ptr->userdata == userdata)
{
result = ptr->time - globalTimer;
prev->next = ptr->next;
FreeTsEvent(ptr);
ptr = prev->next;
}
else
{
prev = ptr;
ptr = ptr->next;
}
}
return result;
}
// Warning: not included in save state.
void RegisterAdvanceCallback(void (*callback)(int cyclesExecuted))
{

View File

@ -94,6 +94,7 @@ namespace CoreTiming
void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata=0);
void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata=0);
s64 UnscheduleEvent(int event_type, u64 userdata);
s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata);
void RemoveEvent(int event_type);
void RemoveThreadsafeEvent(int event_type);

View File

@ -19,6 +19,7 @@
#include "../MIPS/MIPS.h"
#include "../System.h"
#include "../CoreParameter.h"
#include "../CoreTiming.h"
#include "../Reporting.h"
#include "sceGe.h"
#include "sceKernelMemory.h"
@ -37,6 +38,8 @@ struct GeInterruptData
};
static std::list<GeInterruptData> ge_pending_cb;
static int geSyncEvent;
static int geInterruptEvent;
class GeIntrHandler : public IntrHandler
{
@ -100,7 +103,8 @@ public:
ge_pending_cb.pop_front();
gpu->InterruptEnd(intrdata.listid);
WARN_LOG(HLE, "Ignoring interrupt for display list %d, already been released.", intrdata.listid);
if (subintr >= 0)
WARN_LOG(HLE, "Ignoring interrupt for display list %d, already been released.", intrdata.listid);
return false;
}
@ -137,11 +141,34 @@ public:
}
};
void __GeExecuteSync(u64 userdata, int cyclesLate)
{
int listid = userdata >> 32;
WaitType waitType = (WaitType) (userdata & 0xFFFFFFFF);
bool wokeThreads = __KernelTriggerWait(waitType, listid, 0, "GeSync", true);
gpu->SyncEnd(waitType, listid, wokeThreads);
}
void __GeExecuteInterrupt(u64 userdata, int cyclesLate)
{
int listid = userdata >> 32;
u32 pc = userdata & 0xFFFFFFFF;
GeInterruptData intrdata;
intrdata.listid = listid;
intrdata.pc = pc;
ge_pending_cb.push_back(intrdata);
__TriggerInterrupt(PSP_INTR_IMMEDIATE, PSP_GE_INTR, PSP_INTR_SUB_NONE);
}
void __GeInit()
{
memset(&ge_used_callbacks, 0, sizeof(ge_used_callbacks));
ge_pending_cb.clear();
__RegisterIntrHandler(PSP_GE_INTR, new GeIntrHandler());
geSyncEvent = CoreTiming::RegisterEvent("GeSyncEvent", &__GeExecuteSync);
geInterruptEvent = CoreTiming::RegisterEvent("GeInterruptEvent", &__GeExecuteInterrupt);
}
void __GeDoState(PointerWrap &p)
@ -149,6 +176,12 @@ void __GeDoState(PointerWrap &p)
p.DoArray(ge_callback_data, ARRAY_SIZE(ge_callback_data));
p.DoArray(ge_used_callbacks, ARRAY_SIZE(ge_used_callbacks));
p.Do(ge_pending_cb);
p.Do(geSyncEvent);
CoreTiming::RestoreRegisterEvent(geSyncEvent, "GeSyncEvent", &__GeExecuteSync);
p.Do(geInterruptEvent);
CoreTiming::RestoreRegisterEvent(geInterruptEvent, "GeInterruptEvent", &__GeExecuteInterrupt);
// Everything else is done in sceDisplay.
p.DoMarker("sceGe");
}
@ -158,19 +191,26 @@ void __GeShutdown()
}
bool __GeTriggerInterrupt(int listid, u32 pc)
// Warning: may be called from the GPU thread.
bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks)
{
// ClaDun X2 does not expect sceGeListEnqueue to reschedule (which it does not on the PSP.)
// Once PPSSPP's GPU uses cycles, we can remove this check.
DisplayList* dl = gpu->getList(listid);
if (dl != NULL && dl->subIntrBase < 0)
return false;
u64 userdata = (u64)id << 32 | (u64) waitType;
s64 future = atTicks - CoreTiming::GetTicks();
if (waitType == WAITTYPE_GEDRAWSYNC)
{
s64 left = CoreTiming::UnscheduleEvent(geSyncEvent, userdata);
if (left > future)
future = left;
}
CoreTiming::ScheduleEvent(future, geSyncEvent, userdata);
return true;
}
GeInterruptData intrdata;
intrdata.listid = listid;
intrdata.pc = pc;
ge_pending_cb.push_back(intrdata);
__TriggerInterrupt(PSP_INTR_HLE, PSP_GE_INTR, PSP_INTR_SUB_NONE);
// Warning: may be called from the GPU thread.
bool __GeTriggerInterrupt(int listid, u32 pc, u64 atTicks)
{
u64 userdata = (u64)listid << 32 | (u64) pc;
CoreTiming::ScheduleEvent(atTicks - CoreTiming::GetTicks(), geInterruptEvent, userdata);
return true;
}

View File

@ -17,6 +17,8 @@
#pragma once
#include "Core/HLE/sceKernelThread.h"
#define SCE_GE_LIST_COMPLETED 0
#define SCE_GE_LIST_QUEUED 1
#define SCE_GE_LIST_DRAWING 2
@ -39,7 +41,8 @@ void Register_sceGe_user();
void __GeInit();
void __GeDoState(PointerWrap &p);
void __GeShutdown();
bool __GeTriggerInterrupt(int listid, u32 pc);
bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks);
bool __GeTriggerInterrupt(int listid, u32 pc, u64 atTicks);
bool __GeHasPendingInterrupt();

View File

@ -334,7 +334,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
break;
}
cyclesExecuted += 10 * count;
// Rough estimate, not sure what's correct.
cyclesExecuted += 80 * count;
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
// on platforms where draw calls are expensive like mobile and D3D

View File

@ -16,14 +16,14 @@
GPUCommon::GPUCommon() :
currentList(NULL),
isbreak(false),
drawComplete(true),
drawCompleteTicks(0),
dumpNextFrame_(false),
dumpThisFrame_(false),
interruptsEnabled_(true)
{
for (int i = 0; i < DisplayListMaxCount; ++i) {
dls[i].state = PSP_GE_DL_STATE_NONE;
dls[i].shouldWait = false;
dls[i].waitTicks = 0;
}
}
@ -47,7 +47,7 @@ u32 GPUCommon::DrawSync(int mode) {
if (mode == 0) {
// TODO: What if dispatch / interrupts disabled?
if (!drawComplete) {
if (drawCompleteTicks > CoreTiming::GetTicks()) {
__KernelWaitCurThread(WAITTYPE_GEDRAWSYNC, 1, 0, 0, false, "GeDrawSync");
} else {
for (int i = 0; i < DisplayListMaxCount; ++i) {
@ -116,7 +116,7 @@ int GPUCommon::ListSync(int listid, int mode)
}
}
if (dl.shouldWait) {
if (dl.waitTicks > CoreTiming::GetTicks()) {
__KernelWaitCurThread(WAITTYPE_GELISTSYNC, listid, 0, 0, false, "GeListSync");
}
return PSP_GE_LIST_COMPLETED;
@ -139,6 +139,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head)
oldCompatibility = false;
}
u64 currentTicks = CoreTiming::GetTicks();
for (int i = 0; i < DisplayListMaxCount; ++i)
{
if (dls[i].state != PSP_GE_DL_STATE_NONE && dls[i].state != PSP_GE_DL_STATE_COMPLETED) {
@ -157,7 +158,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head)
id = i;
break;
}
if (id < 0 && dls[i].state == PSP_GE_DL_STATE_COMPLETED)
if (id < 0 && dls[i].state == PSP_GE_DL_STATE_COMPLETED && dls[i].waitTicks < currentTicks)
{
id = i;
}
@ -181,7 +182,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head)
dl.stackptr = 0;
dl.signal = PSP_GE_SIGNAL_NONE;
dl.interrupted = false;
dl.shouldWait = true;
dl.waitTicks = (u64)-1;
if (head) {
if (currentList) {
@ -202,7 +203,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head)
currentList = &dl;
dlQueue.push_front(id);
drawComplete = false;
drawCompleteTicks = (u64)-1;
// TODO save context when starting the list if param is set
ProcessDLQueue();
@ -226,7 +227,7 @@ u32 GPUCommon::DequeueList(int listid)
else
dlQueue.remove(listid);
dls[listid].shouldWait = false;
dls[listid].waitTicks = 0;
__KernelTriggerWait(WAITTYPE_GELISTSYNC, listid, 0, "GeListSync");
CheckDrawSync();
@ -265,7 +266,7 @@ u32 GPUCommon::Continue()
// TODO Restore BASE
// We have a list now, so it's not complete.
drawComplete = false;
drawCompleteTicks = (u64)-1;
}
else
currentList->state = PSP_GE_DL_STATE_QUEUED;
@ -425,7 +426,8 @@ bool GPUCommon::InterpretList(DisplayList &list)
inline void GPUCommon::UpdateCycles(u32 pc, u32 newPC)
{
cyclesExecuted += (pc - cycleLastPC) / 4;
// Rough estimate, 2 CPU ticks (it's double the clock rate) per GPU instruction.
cyclesExecuted += 2 * (pc - cycleLastPC) / 4;
cycleLastPC = newPC == 0 ? pc : newPC;
}
@ -453,15 +455,8 @@ bool GPUCommon::ProcessDLQueue()
}
currentList = NULL;
drawComplete = true;
if (__KernelTriggerWait(WAITTYPE_GEDRAWSYNC, 1, 0, "GeDrawSync"))
{
for (int i = 0; i < DisplayListMaxCount; ++i) {
if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) {
dls[i].state = PSP_GE_DL_STATE_NONE;
}
}
}
drawCompleteTicks = startingTicks + cyclesExecuted;
__GeTriggerSync(WAITTYPE_GEDRAWSYNC, 1, drawCompleteTicks);
return true; //no more lists!
}
@ -622,7 +617,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
}
// TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe?
if (interruptsEnabled_ && trigger) {
if (__GeTriggerInterrupt(currentList->id, currentList->pc))
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted))
gpuState = GPUSTATE_INTERRUPT;
}
}
@ -631,7 +626,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
switch (currentList->signal) {
case PSP_GE_SIGNAL_HANDLER_PAUSE:
if (interruptsEnabled_) {
if (__GeTriggerInterrupt(currentList->id, currentList->pc))
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted))
gpuState = GPUSTATE_INTERRUPT;
}
break;
@ -645,9 +640,9 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
currentList->subIntrToken = prev & 0xFFFF;
currentList->state = PSP_GE_DL_STATE_COMPLETED;
gpuState = GPUSTATE_DONE;
if (!interruptsEnabled_ || !__GeTriggerInterrupt(currentList->id, currentList->pc)) {
currentList->shouldWait = false;
__KernelTriggerWait(WAITTYPE_GELISTSYNC, currentList->id, 0, "GeListSync", true);
if (!interruptsEnabled_ || !__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
currentList->waitTicks = startingTicks + cyclesExecuted;
__GeTriggerSync(WAITTYPE_GELISTSYNC, currentList->id, currentList->waitTicks);
}
break;
}
@ -682,7 +677,7 @@ void GPUCommon::DoState(PointerWrap &p) {
p.Do(prev);
p.Do(gpuState);
p.Do(isbreak);
p.Do(drawComplete);
p.Do(drawCompleteTicks);
p.DoMarker("GPUCommon");
}
@ -697,10 +692,23 @@ void GPUCommon::InterruptEnd(int listid)
DisplayList &dl = dls[listid];
// TODO: Unless the signal handler could change it?
if (dl.state == PSP_GE_DL_STATE_COMPLETED) {
dl.shouldWait = false;
if (dl.state == PSP_GE_DL_STATE_COMPLETED || dl.state == PSP_GE_DL_STATE_NONE) {
dl.waitTicks = 0;
__KernelTriggerWait(WAITTYPE_GELISTSYNC, listid, 0, "GeListSync", true);
}
ProcessDLQueue();
}
// TODO: Maybe cleaner to keep this in GE and trigger the clear directly?
void GPUCommon::SyncEnd(WaitType waitType, int listid, bool wokeThreads)
{
if (waitType == WAITTYPE_GEDRAWSYNC && wokeThreads)
{
for (int i = 0; i < DisplayListMaxCount; ++i) {
if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) {
dls[i].state = PSP_GE_DL_STATE_NONE;
}
}
}
}

View File

@ -10,6 +10,7 @@ public:
virtual void InterruptStart(int listid);
virtual void InterruptEnd(int listid);
virtual void SyncEnd(WaitType waitType, int listid, bool wokeThreads);
virtual void EnableInterrupts(bool enable) {
interruptsEnabled_ = enable;
}
@ -43,7 +44,7 @@ protected:
u32 prev;
GPUState gpuState;
bool isbreak;
bool drawComplete;
u64 drawCompleteTicks;
u64 startingTicks;
u32 cycleLastPC;

View File

@ -21,6 +21,7 @@
#include "GPUState.h"
#include <list>
enum WaitType;
class PointerWrap;
enum DisplayListStatus
@ -121,7 +122,7 @@ struct DisplayList
u32 stack[32];
int stackptr;
bool interrupted;
bool shouldWait;
u64 waitTicks;
};
class GPUInterface
@ -147,6 +148,7 @@ public:
virtual void InterruptStart(int listid) = 0;
virtual void InterruptEnd(int listid) = 0;
virtual void SyncEnd(WaitType waitType, int listid, bool wokeThreads) = 0;
virtual void PreExecuteOp(u32 op, u32 diff) = 0;
virtual void ExecuteOp(u32 op, u32 diff) = 0;