Merge pull request #1988 from magumagu/fast-checkgatherpipe

Use faster CheckGatherPipe from JIT.
This commit is contained in:
magumagu 2015-02-21 15:07:59 -08:00
commit 9e29154b08
6 changed files with 76 additions and 64 deletions

View File

@ -55,37 +55,50 @@ void ResetGatherPipe()
m_gatherPipeCount = 0;
}
static void UpdateGatherPipe()
{
u32 cnt;
u8* curMem = Memory::GetPointer(ProcessorInterface::Fifo_CPUWritePointer);
for (cnt = 0; m_gatherPipeCount >= GATHER_PIPE_SIZE; cnt += GATHER_PIPE_SIZE)
{
// copy the GatherPipe
memcpy(curMem, m_gatherPipe + cnt, GATHER_PIPE_SIZE);
m_gatherPipeCount -= GATHER_PIPE_SIZE;
// increase the CPUWritePointer
if (ProcessorInterface::Fifo_CPUWritePointer == ProcessorInterface::Fifo_CPUEnd)
{
ProcessorInterface::Fifo_CPUWritePointer = ProcessorInterface::Fifo_CPUBase;
curMem = Memory::GetPointer(ProcessorInterface::Fifo_CPUWritePointer);
}
else
{
curMem += GATHER_PIPE_SIZE;
ProcessorInterface::Fifo_CPUWritePointer += GATHER_PIPE_SIZE;
}
g_video_backend->Video_GatherPipeBursted();
}
// move back the spill bytes
memmove(m_gatherPipe, m_gatherPipe + cnt, m_gatherPipeCount);
}
void FastCheckGatherPipe()
{
if (m_gatherPipeCount >= GATHER_PIPE_SIZE)
{
UpdateGatherPipe();
}
}
void CheckGatherPipe()
{
if (m_gatherPipeCount >= GATHER_PIPE_SIZE)
{
u32 cnt;
u8* curMem = Memory::GetPointer(ProcessorInterface::Fifo_CPUWritePointer);
for (cnt = 0; m_gatherPipeCount >= GATHER_PIPE_SIZE; cnt += GATHER_PIPE_SIZE)
{
// copy the GatherPipe
memcpy(curMem, m_gatherPipe + cnt, GATHER_PIPE_SIZE);
m_gatherPipeCount -= GATHER_PIPE_SIZE;
UpdateGatherPipe();
// increase the CPUWritePointer
if (ProcessorInterface::Fifo_CPUWritePointer == ProcessorInterface::Fifo_CPUEnd)
{
ProcessorInterface::Fifo_CPUWritePointer = ProcessorInterface::Fifo_CPUBase;
curMem = Memory::GetPointer(ProcessorInterface::Fifo_CPUWritePointer);
}
else
{
curMem += GATHER_PIPE_SIZE;
ProcessorInterface::Fifo_CPUWritePointer += GATHER_PIPE_SIZE;
}
g_video_backend->Video_GatherPipeBursted();
}
// move back the spill bytes
memmove(m_gatherPipe, m_gatherPipe + cnt, m_gatherPipeCount);
// Profile where the FIFO writes are occurring.
// Profile where slow FIFO writes are occurring.
JitInterface::CompileExceptionCheck(JitInterface::ExceptionType::EXCEPTIONS_FIFO_WRITE);
}
}

View File

@ -27,6 +27,7 @@ void DoState(PointerWrap &p);
// ResetGatherPipe
void ResetGatherPipe();
void CheckGatherPipe();
void FastCheckGatherPipe();
bool IsEmpty();

View File

@ -296,7 +296,7 @@ bool Jit64::Cleanup()
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
{
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
ABI_CallFunction((void *)&GPFifo::FastCheckGatherPipe);
ABI_PopRegistersAndAdjustStack({}, 0);
did_something = true;
}
@ -662,14 +662,44 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.isLastInstruction = true;
}
// Gather pipe writes using a non-immediate address are discovered by profiling.
bool gatherPipeIntCheck = jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end();
// Gather pipe writes using an immediate address are explicitly tracked.
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
{
js.fifoBytesThisBlock -= 32;
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
ABI_CallFunction((void *)&GPFifo::FastCheckGatherPipe);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
gatherPipeIntCheck = true;
}
// Gather pipe writes can generate an exception; add an exception check.
// TODO: This doesn't really match hardware; the CP interrupt is
// asynchronous.
if (gatherPipeIntCheck)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch extException = J_CC(CC_NZ, true);
SwitchToFarCode();
SetJumpTarget(extException);
TEST(32, PPCSTATE(msr), Imm32(0x0008000));
FixupBranch noExtIntEnable = J_CC(CC_Z, true);
TEST(32, M(&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
FixupBranch noCPInt = J_CC(CC_Z, true);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
WriteExternalExceptionExit();
SwitchToNearCode();
SetJumpTarget(noCPInt);
SetJumpTarget(noExtIntEnable);
}
u32 function = HLE::GetFunctionIndex(ops[i].address);
@ -715,33 +745,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.firstFPInstructionFound = true;
}
// Add an external exception check if the instruction writes to the FIFO.
if (jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end())
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT));
FixupBranch clearInt = J_CC(CC_NZ);
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch extException = J_CC(CC_NZ, true);
SwitchToFarCode();
SetJumpTarget(extException);
TEST(32, PPCSTATE(msr), Imm32(0x0008000));
FixupBranch noExtIntEnable = J_CC(CC_Z, true);
TEST(32, M(&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
FixupBranch noCPInt = J_CC(CC_Z, true);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
WriteExternalExceptionExit();
SwitchToNearCode();
SetJumpTarget(noCPInt);
SetJumpTarget(noExtIntEnable);
SetJumpTarget(clearInt);
}
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging && breakpoints.IsAddressBreakPoint(ops[i].address) && GetState() != CPU_STEPPING)
{
// Turn off block linking if there are breakpoints so that the Step Over command does not link this block.

View File

@ -355,11 +355,6 @@ static void ImHere()
void JitIL::Cleanup()
{
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
{
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
}
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
if (MMCR0.Hex || MMCR1.Hex)
ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst);

View File

@ -119,7 +119,7 @@ void JitArm::Cleanup()
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
{
PUSH(4, R0, R1, R2, R3);
QuickCallFunction(R14, (void*)&GPFifo::CheckGatherPipe);
QuickCallFunction(R14, (void*)&GPFifo::FastCheckGatherPipe);
POP(4, R0, R1, R2, R3);
}
}
@ -461,7 +461,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
{
js.fifoBytesThisBlock -= 32;
PUSH(4, R0, R1, R2, R3);
QuickCallFunction(R14, (void*)&GPFifo::CheckGatherPipe);
QuickCallFunction(R14, (void*)&GPFifo::FastCheckGatherPipe);
POP(4, R0, R1, R2, R3);
}

View File

@ -287,7 +287,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
regs_in_use[W30] = 0;
ABI_PushRegisters(regs_in_use);
MOVI2R(X30, (u64)&GPFifo::CheckGatherPipe);
MOVI2R(X30, (u64)&GPFifo::FastCheckGatherPipe);
BLR(X30);
ABI_PopRegisters(regs_in_use);
gpr.Unlock(W30);