mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-01-31 18:53:03 +00:00
Merge pull request #1213 from unknownbrackets/dlist-cycles
Try to match GPU cycles to the CPU
This commit is contained in:
commit
dea37e5521
@ -315,6 +315,52 @@ s64 UnscheduleEvent(int event_type, u64 userdata)
|
||||
return result;
|
||||
}
|
||||
|
||||
s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata)
|
||||
{
|
||||
s64 result = 0;
|
||||
std::lock_guard<std::recursive_mutex> lk(externalEventSection);
|
||||
if (!tsFirst)
|
||||
return result;
|
||||
while(tsFirst)
|
||||
{
|
||||
if (tsFirst->type == event_type && tsFirst->userdata == userdata)
|
||||
{
|
||||
result = tsFirst->time - globalTimer;
|
||||
|
||||
Event *next = tsFirst->next;
|
||||
FreeTsEvent(tsFirst);
|
||||
tsFirst = next;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!tsFirst)
|
||||
return result;
|
||||
|
||||
Event *prev = tsFirst;
|
||||
Event *ptr = prev->next;
|
||||
while (ptr)
|
||||
{
|
||||
if (ptr->type == event_type && ptr->userdata == userdata)
|
||||
{
|
||||
result = ptr->time - globalTimer;
|
||||
|
||||
prev->next = ptr->next;
|
||||
FreeTsEvent(ptr);
|
||||
ptr = prev->next;
|
||||
}
|
||||
else
|
||||
{
|
||||
prev = ptr;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Warning: not included in save state.
|
||||
void RegisterAdvanceCallback(void (*callback)(int cyclesExecuted))
|
||||
{
|
||||
|
@ -94,6 +94,7 @@ namespace CoreTiming
|
||||
void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata=0);
|
||||
void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata=0);
|
||||
s64 UnscheduleEvent(int event_type, u64 userdata);
|
||||
s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata);
|
||||
|
||||
void RemoveEvent(int event_type);
|
||||
void RemoveThreadsafeEvent(int event_type);
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "../MIPS/MIPS.h"
|
||||
#include "../System.h"
|
||||
#include "../CoreParameter.h"
|
||||
#include "../CoreTiming.h"
|
||||
#include "../Reporting.h"
|
||||
#include "sceGe.h"
|
||||
#include "sceKernelMemory.h"
|
||||
@ -37,6 +38,8 @@ struct GeInterruptData
|
||||
};
|
||||
|
||||
static std::list<GeInterruptData> ge_pending_cb;
|
||||
static int geSyncEvent;
|
||||
static int geInterruptEvent;
|
||||
|
||||
class GeIntrHandler : public IntrHandler
|
||||
{
|
||||
@ -100,7 +103,8 @@ public:
|
||||
ge_pending_cb.pop_front();
|
||||
gpu->InterruptEnd(intrdata.listid);
|
||||
|
||||
WARN_LOG(HLE, "Ignoring interrupt for display list %d, already been released.", intrdata.listid);
|
||||
if (subintr >= 0)
|
||||
WARN_LOG(HLE, "Ignoring interrupt for display list %d, already been released.", intrdata.listid);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -137,11 +141,34 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
void __GeExecuteSync(u64 userdata, int cyclesLate)
|
||||
{
|
||||
int listid = userdata >> 32;
|
||||
WaitType waitType = (WaitType) (userdata & 0xFFFFFFFF);
|
||||
bool wokeThreads = __KernelTriggerWait(waitType, listid, 0, "GeSync", true);
|
||||
gpu->SyncEnd(waitType, listid, wokeThreads);
|
||||
}
|
||||
|
||||
void __GeExecuteInterrupt(u64 userdata, int cyclesLate)
|
||||
{
|
||||
int listid = userdata >> 32;
|
||||
u32 pc = userdata & 0xFFFFFFFF;
|
||||
|
||||
GeInterruptData intrdata;
|
||||
intrdata.listid = listid;
|
||||
intrdata.pc = pc;
|
||||
ge_pending_cb.push_back(intrdata);
|
||||
__TriggerInterrupt(PSP_INTR_IMMEDIATE, PSP_GE_INTR, PSP_INTR_SUB_NONE);
|
||||
}
|
||||
|
||||
void __GeInit()
|
||||
{
|
||||
memset(&ge_used_callbacks, 0, sizeof(ge_used_callbacks));
|
||||
ge_pending_cb.clear();
|
||||
__RegisterIntrHandler(PSP_GE_INTR, new GeIntrHandler());
|
||||
|
||||
geSyncEvent = CoreTiming::RegisterEvent("GeSyncEvent", &__GeExecuteSync);
|
||||
geInterruptEvent = CoreTiming::RegisterEvent("GeInterruptEvent", &__GeExecuteInterrupt);
|
||||
}
|
||||
|
||||
void __GeDoState(PointerWrap &p)
|
||||
@ -149,6 +176,12 @@ void __GeDoState(PointerWrap &p)
|
||||
p.DoArray(ge_callback_data, ARRAY_SIZE(ge_callback_data));
|
||||
p.DoArray(ge_used_callbacks, ARRAY_SIZE(ge_used_callbacks));
|
||||
p.Do(ge_pending_cb);
|
||||
|
||||
p.Do(geSyncEvent);
|
||||
CoreTiming::RestoreRegisterEvent(geSyncEvent, "GeSyncEvent", &__GeExecuteSync);
|
||||
p.Do(geInterruptEvent);
|
||||
CoreTiming::RestoreRegisterEvent(geInterruptEvent, "GeInterruptEvent", &__GeExecuteInterrupt);
|
||||
|
||||
// Everything else is done in sceDisplay.
|
||||
p.DoMarker("sceGe");
|
||||
}
|
||||
@ -158,19 +191,26 @@ void __GeShutdown()
|
||||
|
||||
}
|
||||
|
||||
bool __GeTriggerInterrupt(int listid, u32 pc)
|
||||
// Warning: may be called from the GPU thread.
|
||||
bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks)
|
||||
{
|
||||
// ClaDun X2 does not expect sceGeListEnqueue to reschedule (which it does not on the PSP.)
|
||||
// Once PPSSPP's GPU uses cycles, we can remove this check.
|
||||
DisplayList* dl = gpu->getList(listid);
|
||||
if (dl != NULL && dl->subIntrBase < 0)
|
||||
return false;
|
||||
u64 userdata = (u64)id << 32 | (u64) waitType;
|
||||
s64 future = atTicks - CoreTiming::GetTicks();
|
||||
if (waitType == WAITTYPE_GEDRAWSYNC)
|
||||
{
|
||||
s64 left = CoreTiming::UnscheduleEvent(geSyncEvent, userdata);
|
||||
if (left > future)
|
||||
future = left;
|
||||
}
|
||||
CoreTiming::ScheduleEvent(future, geSyncEvent, userdata);
|
||||
return true;
|
||||
}
|
||||
|
||||
GeInterruptData intrdata;
|
||||
intrdata.listid = listid;
|
||||
intrdata.pc = pc;
|
||||
ge_pending_cb.push_back(intrdata);
|
||||
__TriggerInterrupt(PSP_INTR_HLE, PSP_GE_INTR, PSP_INTR_SUB_NONE);
|
||||
// Warning: may be called from the GPU thread.
|
||||
bool __GeTriggerInterrupt(int listid, u32 pc, u64 atTicks)
|
||||
{
|
||||
u64 userdata = (u64)listid << 32 | (u64) pc;
|
||||
CoreTiming::ScheduleEvent(atTicks - CoreTiming::GetTicks(), geInterruptEvent, userdata);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Core/HLE/sceKernelThread.h"
|
||||
|
||||
#define SCE_GE_LIST_COMPLETED 0
|
||||
#define SCE_GE_LIST_QUEUED 1
|
||||
#define SCE_GE_LIST_DRAWING 2
|
||||
@ -39,7 +41,8 @@ void Register_sceGe_user();
|
||||
void __GeInit();
|
||||
void __GeDoState(PointerWrap &p);
|
||||
void __GeShutdown();
|
||||
bool __GeTriggerInterrupt(int listid, u32 pc);
|
||||
bool __GeTriggerSync(WaitType waitType, int id, u64 atTicks);
|
||||
bool __GeTriggerInterrupt(int listid, u32 pc, u64 atTicks);
|
||||
bool __GeHasPendingInterrupt();
|
||||
|
||||
|
||||
|
@ -334,7 +334,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
|
||||
break;
|
||||
}
|
||||
|
||||
cyclesExecuted += 10 * count;
|
||||
// Rough estimate, not sure what's correct.
|
||||
cyclesExecuted += 80 * count;
|
||||
|
||||
// TODO: Split this so that we can collect sequences of primitives, can greatly speed things up
|
||||
// on platforms where draw calls are expensive like mobile and D3D
|
||||
|
@ -16,14 +16,14 @@
|
||||
GPUCommon::GPUCommon() :
|
||||
currentList(NULL),
|
||||
isbreak(false),
|
||||
drawComplete(true),
|
||||
drawCompleteTicks(0),
|
||||
dumpNextFrame_(false),
|
||||
dumpThisFrame_(false),
|
||||
interruptsEnabled_(true)
|
||||
{
|
||||
for (int i = 0; i < DisplayListMaxCount; ++i) {
|
||||
dls[i].state = PSP_GE_DL_STATE_NONE;
|
||||
dls[i].shouldWait = false;
|
||||
dls[i].waitTicks = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -47,7 +47,7 @@ u32 GPUCommon::DrawSync(int mode) {
|
||||
|
||||
if (mode == 0) {
|
||||
// TODO: What if dispatch / interrupts disabled?
|
||||
if (!drawComplete) {
|
||||
if (drawCompleteTicks > CoreTiming::GetTicks()) {
|
||||
__KernelWaitCurThread(WAITTYPE_GEDRAWSYNC, 1, 0, 0, false, "GeDrawSync");
|
||||
} else {
|
||||
for (int i = 0; i < DisplayListMaxCount; ++i) {
|
||||
@ -116,7 +116,7 @@ int GPUCommon::ListSync(int listid, int mode)
|
||||
}
|
||||
}
|
||||
|
||||
if (dl.shouldWait) {
|
||||
if (dl.waitTicks > CoreTiming::GetTicks()) {
|
||||
__KernelWaitCurThread(WAITTYPE_GELISTSYNC, listid, 0, 0, false, "GeListSync");
|
||||
}
|
||||
return PSP_GE_LIST_COMPLETED;
|
||||
@ -139,6 +139,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head)
|
||||
oldCompatibility = false;
|
||||
}
|
||||
|
||||
u64 currentTicks = CoreTiming::GetTicks();
|
||||
for (int i = 0; i < DisplayListMaxCount; ++i)
|
||||
{
|
||||
if (dls[i].state != PSP_GE_DL_STATE_NONE && dls[i].state != PSP_GE_DL_STATE_COMPLETED) {
|
||||
@ -157,7 +158,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head)
|
||||
id = i;
|
||||
break;
|
||||
}
|
||||
if (id < 0 && dls[i].state == PSP_GE_DL_STATE_COMPLETED)
|
||||
if (id < 0 && dls[i].state == PSP_GE_DL_STATE_COMPLETED && dls[i].waitTicks < currentTicks)
|
||||
{
|
||||
id = i;
|
||||
}
|
||||
@ -181,7 +182,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head)
|
||||
dl.stackptr = 0;
|
||||
dl.signal = PSP_GE_SIGNAL_NONE;
|
||||
dl.interrupted = false;
|
||||
dl.shouldWait = true;
|
||||
dl.waitTicks = (u64)-1;
|
||||
|
||||
if (head) {
|
||||
if (currentList) {
|
||||
@ -202,7 +203,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head)
|
||||
currentList = &dl;
|
||||
dlQueue.push_front(id);
|
||||
|
||||
drawComplete = false;
|
||||
drawCompleteTicks = (u64)-1;
|
||||
|
||||
// TODO save context when starting the list if param is set
|
||||
ProcessDLQueue();
|
||||
@ -226,7 +227,7 @@ u32 GPUCommon::DequeueList(int listid)
|
||||
else
|
||||
dlQueue.remove(listid);
|
||||
|
||||
dls[listid].shouldWait = false;
|
||||
dls[listid].waitTicks = 0;
|
||||
__KernelTriggerWait(WAITTYPE_GELISTSYNC, listid, 0, "GeListSync");
|
||||
|
||||
CheckDrawSync();
|
||||
@ -265,7 +266,7 @@ u32 GPUCommon::Continue()
|
||||
// TODO Restore BASE
|
||||
|
||||
// We have a list now, so it's not complete.
|
||||
drawComplete = false;
|
||||
drawCompleteTicks = (u64)-1;
|
||||
}
|
||||
else
|
||||
currentList->state = PSP_GE_DL_STATE_QUEUED;
|
||||
@ -425,7 +426,8 @@ bool GPUCommon::InterpretList(DisplayList &list)
|
||||
|
||||
inline void GPUCommon::UpdateCycles(u32 pc, u32 newPC)
|
||||
{
|
||||
cyclesExecuted += (pc - cycleLastPC) / 4;
|
||||
// Rough estimate, 2 CPU ticks (it's double the clock rate) per GPU instruction.
|
||||
cyclesExecuted += 2 * (pc - cycleLastPC) / 4;
|
||||
cycleLastPC = newPC == 0 ? pc : newPC;
|
||||
}
|
||||
|
||||
@ -453,15 +455,8 @@ bool GPUCommon::ProcessDLQueue()
|
||||
}
|
||||
currentList = NULL;
|
||||
|
||||
drawComplete = true;
|
||||
if (__KernelTriggerWait(WAITTYPE_GEDRAWSYNC, 1, 0, "GeDrawSync"))
|
||||
{
|
||||
for (int i = 0; i < DisplayListMaxCount; ++i) {
|
||||
if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) {
|
||||
dls[i].state = PSP_GE_DL_STATE_NONE;
|
||||
}
|
||||
}
|
||||
}
|
||||
drawCompleteTicks = startingTicks + cyclesExecuted;
|
||||
__GeTriggerSync(WAITTYPE_GEDRAWSYNC, 1, drawCompleteTicks);
|
||||
|
||||
return true; //no more lists!
|
||||
}
|
||||
@ -622,7 +617,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
||||
}
|
||||
// TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe?
|
||||
if (interruptsEnabled_ && trigger) {
|
||||
if (__GeTriggerInterrupt(currentList->id, currentList->pc))
|
||||
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted))
|
||||
gpuState = GPUSTATE_INTERRUPT;
|
||||
}
|
||||
}
|
||||
@ -631,7 +626,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
||||
switch (currentList->signal) {
|
||||
case PSP_GE_SIGNAL_HANDLER_PAUSE:
|
||||
if (interruptsEnabled_) {
|
||||
if (__GeTriggerInterrupt(currentList->id, currentList->pc))
|
||||
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted))
|
||||
gpuState = GPUSTATE_INTERRUPT;
|
||||
}
|
||||
break;
|
||||
@ -645,9 +640,9 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
||||
currentList->subIntrToken = prev & 0xFFFF;
|
||||
currentList->state = PSP_GE_DL_STATE_COMPLETED;
|
||||
gpuState = GPUSTATE_DONE;
|
||||
if (!interruptsEnabled_ || !__GeTriggerInterrupt(currentList->id, currentList->pc)) {
|
||||
currentList->shouldWait = false;
|
||||
__KernelTriggerWait(WAITTYPE_GELISTSYNC, currentList->id, 0, "GeListSync", true);
|
||||
if (!interruptsEnabled_ || !__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
|
||||
currentList->waitTicks = startingTicks + cyclesExecuted;
|
||||
__GeTriggerSync(WAITTYPE_GELISTSYNC, currentList->id, currentList->waitTicks);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -682,7 +677,7 @@ void GPUCommon::DoState(PointerWrap &p) {
|
||||
p.Do(prev);
|
||||
p.Do(gpuState);
|
||||
p.Do(isbreak);
|
||||
p.Do(drawComplete);
|
||||
p.Do(drawCompleteTicks);
|
||||
p.DoMarker("GPUCommon");
|
||||
}
|
||||
|
||||
@ -697,10 +692,23 @@ void GPUCommon::InterruptEnd(int listid)
|
||||
|
||||
DisplayList &dl = dls[listid];
|
||||
// TODO: Unless the signal handler could change it?
|
||||
if (dl.state == PSP_GE_DL_STATE_COMPLETED) {
|
||||
dl.shouldWait = false;
|
||||
if (dl.state == PSP_GE_DL_STATE_COMPLETED || dl.state == PSP_GE_DL_STATE_NONE) {
|
||||
dl.waitTicks = 0;
|
||||
__KernelTriggerWait(WAITTYPE_GELISTSYNC, listid, 0, "GeListSync", true);
|
||||
}
|
||||
|
||||
ProcessDLQueue();
|
||||
}
|
||||
|
||||
// TODO: Maybe cleaner to keep this in GE and trigger the clear directly?
|
||||
void GPUCommon::SyncEnd(WaitType waitType, int listid, bool wokeThreads)
|
||||
{
|
||||
if (waitType == WAITTYPE_GEDRAWSYNC && wokeThreads)
|
||||
{
|
||||
for (int i = 0; i < DisplayListMaxCount; ++i) {
|
||||
if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) {
|
||||
dls[i].state = PSP_GE_DL_STATE_NONE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -10,6 +10,7 @@ public:
|
||||
|
||||
virtual void InterruptStart(int listid);
|
||||
virtual void InterruptEnd(int listid);
|
||||
virtual void SyncEnd(WaitType waitType, int listid, bool wokeThreads);
|
||||
virtual void EnableInterrupts(bool enable) {
|
||||
interruptsEnabled_ = enable;
|
||||
}
|
||||
@ -43,7 +44,7 @@ protected:
|
||||
u32 prev;
|
||||
GPUState gpuState;
|
||||
bool isbreak;
|
||||
bool drawComplete;
|
||||
u64 drawCompleteTicks;
|
||||
|
||||
u64 startingTicks;
|
||||
u32 cycleLastPC;
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "GPUState.h"
|
||||
#include <list>
|
||||
|
||||
enum WaitType;
|
||||
class PointerWrap;
|
||||
|
||||
enum DisplayListStatus
|
||||
@ -121,7 +122,7 @@ struct DisplayList
|
||||
u32 stack[32];
|
||||
int stackptr;
|
||||
bool interrupted;
|
||||
bool shouldWait;
|
||||
u64 waitTicks;
|
||||
};
|
||||
|
||||
class GPUInterface
|
||||
@ -147,6 +148,7 @@ public:
|
||||
|
||||
virtual void InterruptStart(int listid) = 0;
|
||||
virtual void InterruptEnd(int listid) = 0;
|
||||
virtual void SyncEnd(WaitType waitType, int listid, bool wokeThreads) = 0;
|
||||
|
||||
virtual void PreExecuteOp(u32 op, u32 diff) = 0;
|
||||
virtual void ExecuteOp(u32 op, u32 diff) = 0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user