From d15d1e817a9a54523a75cbb56c53d62394a7f806 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 11 Oct 2020 01:13:16 +1000 Subject: [PATCH] wip --- src/core/cpu_code_cache.cpp | 68 ++++++++++++++++++++++ src/core/cpu_code_cache.h | 1 + src/core/cpu_core.cpp | 10 ++++ src/core/cpu_core_private.h | 1 + src/core/cpu_recompiler_code_generator.cpp | 3 + src/core/cpu_types.cpp | 58 ++++++++++++++++++ src/core/cpu_types.h | 3 + 7 files changed, 144 insertions(+) diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 727f449d8..946a7bbad 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -168,6 +168,8 @@ static void ExecuteImpl() CheckAndUpdateICacheTags(block->icache_line_count, block->uncached_fetch_ticks); InterpretCachedBlock(*block); + if (block->is_idle_loop) + IdleSkip(); if (g_state.pending_ticks >= g_state.downcount) break; @@ -387,6 +389,68 @@ recompile: return true; } +static bool IsIdleLoop(u32 pc) +{ + enum InstructionType + { + Match, + FollowBranch, + BranchThreshold + }; + struct InstructionAndMask + { + InstructionType type; + u32 instruction; + u32 mask; + }; + + static constexpr InstructionAndMask idle_1[] = { + {InstructionType::Match, 0x8fa20000, 0xFFFF0000}, // lw v0, 16(sp) + {InstructionType::Match, 0x00000000, 0xFFFFFFFF}, // sll $zero, $zero, 0 + {InstructionType::Match, 0x2442ffff, 0xFFFFFFFF}, // addiu v0, v0, ffff + {InstructionType::Match, 0xafa20000, 0xFFFF0000}, // sw v0, 16(sp) + {InstructionType::Match, 0x8fa20000, 0xFFFF0000}, // lw v0, 16(sp) + {InstructionType::Match, 0x00000000, 0xFFFFFFFF}, // sll $zero, $zero, 0 + {InstructionType::FollowBranch, 0x14430000, 0xFFFF0000}, // bne v0, v1, 8003228c + {InstructionType::Match, 0x3c020000, 0xFFFF0000}, // lui v0, 8007 + {InstructionType::Match, 0x8c420000, 0xFFFF0000}, // lw v0, -23808(v0) + {InstructionType::Match, 0x00000000, 0xFFFFFFFF}, // sll $zero, $zero, 0 + {InstructionType::Match, 0x0044102a, 0xFFFFFFFF}, // slt v0, v0, a0 + {InstructionType::BranchThreshold, 0x14400000, 0xFFFF0000}, // bne v0, $zero, 80032244 + }; + + u32 current_pc = pc; + bool match = true; + for (const InstructionAndMask& im : idle_1) + { + Instruction instruction; + if (!SafeReadInstruction(current_pc, &instruction.bits) || (instruction.bits & im.mask) != im.instruction) + { + match = false; + break; + } + + if (im.type == FollowBranch) + { + if (!CPU::IsFollowableBranchInstruction(instruction, current_pc, pc, 128)) + break; + + current_pc = CPU::GetBranchInstructionTarget(instruction, current_pc); + continue; + } + + if (im.type == BranchThreshold) + { + if (!CPU::IsFollowableBranchInstruction(instruction, current_pc, pc, 128)) + break; + } + + current_pc += sizeof(u32); + } + + return match; +} + bool CompileBlock(CodeBlock* block) { u32 pc = block->GetPC(); @@ -398,6 +462,10 @@ bool CompileBlock(CodeBlock* block) __debugbreak(); #endif + block->is_idle_loop = IsIdleLoop(pc); + if (block->is_idle_loop) + Log_InfoPrintf("Idle loop found at 0x%08X", pc); + u32 last_cache_line = ICACHE_LINES; for (;;) diff --git a/src/core/cpu_code_cache.h b/src/core/cpu_code_cache.h index 068e6706e..b5e59af08 100644 --- a/src/core/cpu_code_cache.h +++ b/src/core/cpu_code_cache.h @@ -64,6 +64,7 @@ struct CodeBlock TickCount uncached_fetch_ticks = 0; u32 icache_line_count = 0; bool invalidated = false; + bool is_idle_loop = false; const u32 GetPC() const { return key.GetPC(); } const u32 GetSizeInBytes() const { return static_cast(instructions.size()) * sizeof(Instruction); } diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index 1f48ceb06..06a84b62f 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -237,6 +237,16 @@ void ClearExternalInterrupt(u8 bit) g_state.cop0_regs.cause.Ip &= static_cast(~(1u << bit)); } +void IdleSkip() +{ + const TickCount ticks_to_skip = g_state.downcount - g_state.pending_ticks; + if (ticks_to_skip <= 0) + return; + + Log_DebugPrintf("Skipping %d ticks", ticks_to_skip); + g_state.pending_ticks += ticks_to_skip; +} + ALWAYS_INLINE_RELEASE static void UpdateLoadDelay() { // the old value is needed in case the delay slot instruction overwrites the same register diff --git a/src/core/cpu_core_private.h b/src/core/cpu_core_private.h index 9f74fd7f0..181f1fb64 100644 --- a/src/core/cpu_core_private.h +++ b/src/core/cpu_core_private.h @@ -6,6 +6,7 @@ namespace CPU { // exceptions void RaiseException(Exception excode); void RaiseException(u32 CAUSE_bits, u32 EPC); +void IdleSkip(); ALWAYS_INLINE static bool HasPendingInterrupt() { diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 7f7a81bba..30f28e8d7 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -866,6 +866,9 @@ void CodeGenerator::BlockEpilogue() m_register_cache.WriteLoadDelayToCPU(true); AddPendingCycles(true); + + if (m_block->is_idle_loop) + EmitFunctionCall(nullptr, &CPU::IdleSkip); } void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, diff --git a/src/core/cpu_types.cpp b/src/core/cpu_types.cpp index 3fe02b593..460e79110 100644 --- a/src/core/cpu_types.cpp +++ b/src/core/cpu_types.cpp @@ -44,6 +44,64 @@ bool IsBranchInstruction(const Instruction& instruction) } } +bool IsFollowableBranchInstruction(const Instruction& instruction, u32 instruction_pc, u32 block_pc, + u32 threshold_in_instructions) +{ + switch (instruction.op) + { + case InstructionOp::b: + { + // skip when linking + if ((static_cast(instruction.i.rt.GetValue()) & u8(0x1E)) == u8(0x10)) + return false; + + if (static_cast(std::abs(static_cast(instruction.i.imm_sext32()))) > threshold_in_instructions) + return false; + } + break; + + case InstructionOp::beq: + case InstructionOp::bgtz: + case InstructionOp::blez: + case InstructionOp::bne: + { + if (static_cast(std::abs(static_cast(instruction.i.imm_sext32()))) > threshold_in_instructions) + return false; + } + break; + + default: + return false; + } + + // we can't branch before the start of the block... + const u32 branch_target = GetBranchInstructionTarget(instruction, instruction_pc); + if (branch_target < block_pc) + return false; + + return true; +} + +u32 GetBranchInstructionTarget(const Instruction& instruction, u32 instruction_pc) +{ + switch (instruction.op) + { + case InstructionOp::j: + case InstructionOp::jal: + return ((instruction_pc + 4) & UINT32_C(0xF0000000)) | (instruction.j.target << 2); + + case InstructionOp::b: + case InstructionOp::beq: + case InstructionOp::bgtz: + case InstructionOp::blez: + case InstructionOp::bne: + return instruction_pc + 4 + (instruction.i.imm_sext32() << 2); + + default: + return instruction_pc; + } +} + bool IsMemoryLoadInstruction(const Instruction& instruction) { switch (instruction.op) diff --git a/src/core/cpu_types.h b/src/core/cpu_types.h index 785c746fb..3c25a35a9 100644 --- a/src/core/cpu_types.h +++ b/src/core/cpu_types.h @@ -218,6 +218,9 @@ bool InstructionHasLoadDelay(const Instruction& instruction); bool IsExitBlockInstruction(const Instruction& instruction); bool CanInstructionTrap(const Instruction& instruction, bool in_user_mode); bool IsInvalidInstruction(const Instruction& instruction); +bool IsFollowableBranchInstruction(const Instruction& instruction, u32 instruction_pc, u32 block_pc, + u32 threshold_in_instructions); +u32 GetBranchInstructionTarget(const Instruction& instruction, u32 instruction_pc); struct Registers {