mirror of
https://github.com/mupen64plus-ae/parallel-rsp.git
synced 2024-11-27 07:20:23 +00:00
JIT out some code, it's a start.
This commit is contained in:
parent
350ca7d46c
commit
c8bdf5023e
6
main.cpp
6
main.cpp
@ -30,6 +30,7 @@ static vector<uint32_t> read_binary(const char *path, bool flip)
|
||||
return v;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static bool read_tag_validate(FILE *file, const char *tag)
|
||||
{
|
||||
char tmp[9] = {};
|
||||
@ -289,10 +290,11 @@ static void validate_trace(RSP::CPU &cpu, const char *path)
|
||||
|
||||
fclose(file);
|
||||
}
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
RSP::CPU cpu;
|
||||
RSP::JIT::CPU cpu;
|
||||
auto &state = cpu.get_state();
|
||||
|
||||
uint32_t cr[16] = {};
|
||||
@ -318,8 +320,10 @@ int main(int argc, char *argv[])
|
||||
cpu.run();
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
else if (argc == 2)
|
||||
validate_trace(cpu, argv[1]);
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
709
rsp_jit.cpp
709
rsp_jit.cpp
@ -1,12 +1,23 @@
|
||||
#include "rsp_jit.hpp"
|
||||
#include <utility>
|
||||
#include <assert.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
// We're only guaranteed 3 V registers (x86).
|
||||
#define JIT_REGISTER_SELF JIT_V0
|
||||
#define JIT_REGISTER_STATE JIT_V1
|
||||
#define JIT_REGISTER_DMEM JIT_V2
|
||||
|
||||
#define JIT_REGISTER_MODE JIT_R1
|
||||
#define JIT_REGISTER_NEXT_PC JIT_R0
|
||||
|
||||
// Freely used to implement instructions.
|
||||
#define JIT_REGISTER_TMP0 JIT_R0
|
||||
#define JIT_REGISTER_TMP1 JIT_R1
|
||||
|
||||
// We're only guaranteed 3 R registers (x86).
|
||||
#define JIT_REGISTER_COND_BRANCH_TAKEN JIT_R(JIT_R_NUM - 1)
|
||||
#define JIT_FRAME_SIZE 256
|
||||
|
||||
namespace RSP
|
||||
@ -72,6 +83,9 @@ uint64_t CPU::hash_imem(unsigned pc, unsigned count) const
|
||||
unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
|
||||
{
|
||||
// Scans through IMEM and finds the logical "end" of the instruction stream.
|
||||
// A logical end of the instruction stream is where execution must terminate.
|
||||
// If we have forward branches into this block, i.e. gotos, they extend the execution stream.
|
||||
// However, we cannot execute beyond end.
|
||||
unsigned max_static_pc = pc;
|
||||
unsigned count = end - pc;
|
||||
|
||||
@ -100,7 +114,8 @@ unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
|
||||
switch (instr & 63)
|
||||
{
|
||||
case 010:
|
||||
// JR always terminates either by returning or exiting.
|
||||
case 011:
|
||||
// JR and JALR always terminate execution of the block.
|
||||
// We execute the next instruction via delay slot and exit.
|
||||
// Unless we can branch past the JR
|
||||
// (max_static_pc will be higher than expected),
|
||||
@ -130,6 +145,7 @@ unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
|
||||
case 001: // BGEZ
|
||||
case 021: // BGEZAL
|
||||
case 020: // BLTZAL
|
||||
// TODO/Optimization: Handle static branch case where $0 is used.
|
||||
target = (pc + i + 1 + instr) & 0x3ff;
|
||||
if (target >= pc && target < end) // goto
|
||||
max_static_pc = max(max_static_pc, target + 1);
|
||||
@ -140,8 +156,9 @@ unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
|
||||
}
|
||||
break;
|
||||
|
||||
case 002:
|
||||
// J is resolved by goto.
|
||||
case 002: // J
|
||||
case 003: // JAL
|
||||
// J is resolved by goto. Same with JAL if call target happens to be inside the block.
|
||||
target = instr & 0x3ff;
|
||||
if (target >= pc && target < end) // goto
|
||||
{
|
||||
@ -169,6 +186,7 @@ unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
|
||||
case 005: // BNE
|
||||
case 006: // BLEZ
|
||||
case 007: // BGTZ
|
||||
// TODO/Optimization: Handle static branch case where $0 is used.
|
||||
target = (pc + i + 1 + instr) & 0x3ff;
|
||||
if (target >= pc && target < end) // goto
|
||||
max_static_pc = max(max_static_pc, target + 1);
|
||||
@ -207,6 +225,7 @@ void CPU::init_jit_thunks()
|
||||
jit_getarg(JIT_REGISTER_SELF, self);
|
||||
jit_getarg(JIT_REGISTER_STATE, state);
|
||||
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, pc));
|
||||
jit_ldxi(JIT_REGISTER_DMEM, JIT_REGISTER_STATE, offsetof(CPUState, dmem));
|
||||
jit_movi(JIT_REGISTER_MODE, MODE_ENTER);
|
||||
|
||||
auto *entry_label = jit_indirect();
|
||||
@ -272,30 +291,641 @@ int CPU::enter(uint32_t pc)
|
||||
return thunks.enter_frame(this, &state);
|
||||
}
|
||||
|
||||
Func CPU::jit_region(uint64_t hash, unsigned pc, unsigned count)
|
||||
void CPU::jit_end_of_block(jit_state_t *_jit, uint32_t pc, const CPU::InstructionInfo &last_info)
|
||||
{
|
||||
// If we run off the end of a block with a pending delay slot, we need to move it to CPUState.
|
||||
// We always branch to the next PC, and the delay slot will be handled after the first instruction in next block.
|
||||
auto *forward = jit_forward();
|
||||
if (last_info.branch)
|
||||
{
|
||||
if (last_info.conditional)
|
||||
jit_patch_at(jit_beqi(JIT_REGISTER_COND_BRANCH_TAKEN, 0), forward);
|
||||
|
||||
if (last_info.indirect)
|
||||
jit_ldxi_i(JIT_REGISTER_TMP0, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * last_info.branch_target);
|
||||
else
|
||||
jit_movi(JIT_REGISTER_TMP0, last_info.branch_target);
|
||||
jit_stxi_i(offsetof(CPUState, branch_target), JIT_REGISTER_STATE, JIT_REGISTER_TMP0);
|
||||
jit_movi(JIT_REGISTER_TMP0, 1);
|
||||
jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, JIT_REGISTER_TMP0);
|
||||
}
|
||||
|
||||
jit_link(forward);
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, pc);
|
||||
jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
|
||||
}
|
||||
|
||||
void CPU::jit_handle_delay_slot(jit_state_t *_jit, const InstructionInfo &last_info,
|
||||
jit_node_t **local_targets, uint32_t base_pc, uint32_t end_pc)
|
||||
{
|
||||
if (last_info.conditional)
|
||||
{
|
||||
if (!last_info.indirect && last_info.branch_target >= base_pc && last_info.branch_target < end_pc)
|
||||
{
|
||||
jit_patch_at(jit_bnei(JIT_REGISTER_COND_BRANCH_TAKEN, 0), local_targets[(last_info.branch_target - base_pc) >> 2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto *no_branch = jit_bnei(JIT_REGISTER_COND_BRANCH_TAKEN, 0);
|
||||
if (last_info.indirect)
|
||||
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * last_info.branch_target);
|
||||
else
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
|
||||
jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
|
||||
jit_patch(no_branch);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!last_info.indirect && last_info.branch_target >= base_pc && last_info.branch_target < end_pc)
|
||||
{
|
||||
jit_patch_at(jit_jmpi(), local_targets[(last_info.branch_target - base_pc) >> 2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (last_info.indirect)
|
||||
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE,offsetof(CPUState, sr) + 4 * last_info.branch_target);
|
||||
else
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
|
||||
jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::jit_exit(jit_state_t *_jit, uint32_t pc, const InstructionInfo &last_info, ReturnMode mode, bool first_instruction)
|
||||
{
|
||||
if (first_instruction)
|
||||
{
|
||||
// Need to consider that we need to move delay slot to PC.
|
||||
jit_ldxi_i(JIT_REGISTER_TMP0, JIT_REGISTER_STATE, offsetof(CPUState, has_delay_slot));
|
||||
|
||||
auto *latent_delay_slot = jit_forward();
|
||||
jit_patch_at(jit_bnei(JIT_REGISTER_TMP0, 0), latent_delay_slot);
|
||||
|
||||
// Common case.
|
||||
// Immediately exit.
|
||||
jit_movi(JIT_REGISTER_MODE, mode);
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, pc + 4);
|
||||
auto *jmp = jit_jmpi();
|
||||
jit_patch_abs(jmp, thunks.return_thunk);
|
||||
|
||||
// If we had a latent delay slot, we handle it here.
|
||||
jit_link(latent_delay_slot);
|
||||
// We cannot execute a branch inside a delay slot, so just assume we do not have to chain together these.
|
||||
// We could technically handle it, but it gets messy (and it's illegal MIPS), so don't bother.
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, 0);
|
||||
jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
|
||||
jit_movi(JIT_REGISTER_MODE, mode);
|
||||
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, branch_target));
|
||||
}
|
||||
else if (!last_info.branch)
|
||||
{
|
||||
// Immediately exit.
|
||||
jit_movi(JIT_REGISTER_MODE, mode);
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, pc + 4);
|
||||
}
|
||||
else if (!last_info.indirect && !last_info.conditional)
|
||||
{
|
||||
// Redirect PC to whatever value we were supposed to branch to.
|
||||
jit_movi(JIT_REGISTER_MODE, mode);
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
|
||||
}
|
||||
else if (!last_info.conditional)
|
||||
{
|
||||
// We have an indirect branch, load that register into PC.
|
||||
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * last_info.branch_target);
|
||||
jit_movi(JIT_REGISTER_MODE, mode);
|
||||
}
|
||||
else if (last_info.indirect)
|
||||
{
|
||||
// Indirect conditional branch.
|
||||
auto *node = jit_beqi(JIT_REGISTER_COND_BRANCH_TAKEN, 0);
|
||||
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * last_info.branch_target);
|
||||
auto *to_end = jit_jmpi();
|
||||
jit_patch(node);
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, pc + 4);
|
||||
jit_patch(to_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Direct conditional branch.
|
||||
auto *node = jit_beqi(JIT_REGISTER_COND_BRANCH_TAKEN, 0);
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
|
||||
auto *to_end = jit_jmpi();
|
||||
jit_patch(node);
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, pc + 4);
|
||||
jit_patch(to_end);
|
||||
}
|
||||
|
||||
auto *jmp = jit_jmpi();
|
||||
jit_patch_abs(jmp, thunks.return_thunk);
|
||||
}
|
||||
|
||||
void CPU::jit_load_register(jit_state_t *_jit, unsigned jit_register, unsigned mips_register)
|
||||
{
|
||||
if (mips_register == 0)
|
||||
jit_movi(jit_register, 0);
|
||||
else
|
||||
jit_ldxi_i(jit_register, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_register);
|
||||
}
|
||||
|
||||
void CPU::jit_store_register(jit_state_t *_jit, unsigned jit_register, unsigned mips_register)
|
||||
{
|
||||
assert(mips_register != 0);
|
||||
jit_stxi_i(offsetof(CPUState, sr) + 4 * mips_register, JIT_REGISTER_STATE, jit_register);
|
||||
}
|
||||
|
||||
#define DISASM(asmfmt, ...) do { \
|
||||
char buf[1024]; \
|
||||
sprintf(buf, "0x%03x " asmfmt, pc, __VA_ARGS__); \
|
||||
mips_disasm += buf; \
|
||||
} while(0)
|
||||
|
||||
#define DISASM_NOP() do { \
|
||||
char buf[1024]; \
|
||||
sprintf(buf, "0x%03x nop\n", pc); \
|
||||
mips_disasm += buf; \
|
||||
} while(0)
|
||||
|
||||
void CPU::jit_instruction(jit_state_t *_jit, uint32_t pc, uint32_t instr,
|
||||
InstructionInfo &info, const InstructionInfo &last_info,
|
||||
bool first_instruction)
|
||||
{
|
||||
// VU
|
||||
if ((instr >> 25) == 0x25)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Meaningful register allocation.
|
||||
// For now, always flush register state to memory after an instruction for simplicity.
|
||||
// Should be red-hot in L1 cache, so probably won't be that bad.
|
||||
// On x86, we unfortunately have an anemic register bank to work with.
|
||||
|
||||
uint32_t type = instr >> 26;
|
||||
|
||||
#define NOP_IF_RD_ZERO() if (rd == 0) { DISASM_NOP(); break; }
|
||||
#define NOP_IF_RT_ZERO() if (rt == 0) { DISASM_NOP(); break; }
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 000:
|
||||
{
|
||||
auto rd = (instr >> 11) & 31;
|
||||
auto rt = (instr >> 16) & 31;
|
||||
auto shift = (instr >> 6) & 31;
|
||||
auto rs = (instr >> 21) & 31;
|
||||
|
||||
switch (instr & 63)
|
||||
{
|
||||
case 000: // SLL
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_lshi(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, shift);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("sll r%u, r%u, %u\n", rd, rt, shift);
|
||||
break;
|
||||
}
|
||||
|
||||
case 002: // SRL
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_rshi_u(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, shift);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("srl r%u, r%u, %u\n", rd, rt, shift);
|
||||
break;
|
||||
}
|
||||
|
||||
case 003: // SRA
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_rshi(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, shift);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("sra r%u, r%u, %u\n", rd, rt, shift);
|
||||
break;
|
||||
}
|
||||
|
||||
case 004: // SLLV
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
|
||||
jit_andi(JIT_REGISTER_TMP1, JIT_REGISTER_TMP1, 31);
|
||||
jit_lshr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("sllv r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
case 006: // SRLV
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
|
||||
jit_andi(JIT_REGISTER_TMP1, JIT_REGISTER_TMP1, 31);
|
||||
jit_rshr_u(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("srlv r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
case 007: // SRAV
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
|
||||
jit_andi(JIT_REGISTER_TMP1, JIT_REGISTER_TMP1, 31);
|
||||
jit_rshr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("srav r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
case 010: // JR
|
||||
DISASM("jr %u\n", 0);
|
||||
break;
|
||||
case 011: // JALR
|
||||
DISASM("jalr %u\n", 0);
|
||||
break;
|
||||
|
||||
case 015: // BREAK
|
||||
{
|
||||
jit_exit(_jit, pc, last_info, MODE_BREAK, first_instruction);
|
||||
info.handles_delay_slot = true;
|
||||
DISASM("break %u\n", 0);
|
||||
break;
|
||||
}
|
||||
|
||||
case 040: // ADD
|
||||
case 041: // ADDU
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
|
||||
jit_addr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("addu r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
case 042: // SUB
|
||||
case 043: // SUBU
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
|
||||
jit_subr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("subu r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
case 044: // AND
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
|
||||
jit_andr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("and r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
case 045: // OR
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
|
||||
jit_orr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("or r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
case 046: // XOR
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
|
||||
jit_xorr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("xor r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
case 047: // NOR
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
|
||||
jit_orr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_xori(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, jit_word_t(-1));
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("nor r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
case 052: // SLT
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rt);
|
||||
jit_ltr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("slt r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
case 053: // SLTU
|
||||
{
|
||||
NOP_IF_RD_ZERO();
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP1, rt);
|
||||
jit_ltr_u(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
|
||||
DISASM("sltu r%u, r%u, r%u\n", rd, rt, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 001: // REGIMM
|
||||
{
|
||||
//unsigned rs = (instr >> 21) & 31;
|
||||
unsigned rt = (instr >> 16) & 31;
|
||||
|
||||
switch (rt)
|
||||
{
|
||||
case 020: // BLTZAL
|
||||
DISASM("bltzal %u\n", 0);
|
||||
break;
|
||||
|
||||
case 000: // BLTZ
|
||||
DISASM("bltz %u\n", 0);
|
||||
break;
|
||||
|
||||
case 021: // BGEZAL
|
||||
DISASM("bgezal %u\n", 0);
|
||||
break;
|
||||
|
||||
case 001: // BGEZ
|
||||
DISASM("bgez %u\n", 0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 003: // JAL
|
||||
{
|
||||
uint32_t target_pc = (instr & 0x3ffu) << 2;
|
||||
jit_movi(JIT_REGISTER_TMP0, pc + 8);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, 31);
|
||||
info.branch = true;
|
||||
info.branch_target = target_pc;
|
||||
DISASM("jal 0x%03x\n", target_pc);
|
||||
break;
|
||||
}
|
||||
|
||||
case 002: // J
|
||||
{
|
||||
uint32_t target_pc = (instr & 0x3ffu) << 2;
|
||||
info.branch = true;
|
||||
info.branch_target = target_pc;
|
||||
DISASM("j 0x%03x\n", target_pc);
|
||||
break;
|
||||
}
|
||||
|
||||
case 004: // BEQ
|
||||
DISASM("beq %u\n", 0);
|
||||
break;
|
||||
|
||||
case 005: // BNE
|
||||
DISASM("bne %u\n", 0);
|
||||
break;
|
||||
|
||||
case 006: // BLEZ
|
||||
DISASM("blez %u\n", 0);
|
||||
break;
|
||||
|
||||
case 007: // BGTZ
|
||||
DISASM("bgtz %u\n", 0);
|
||||
break;
|
||||
|
||||
case 010: // ADDI
|
||||
case 011:
|
||||
{
|
||||
unsigned rt = (instr >> 16) & 31;
|
||||
NOP_IF_RT_ZERO();
|
||||
int16_t simm = int16_t(instr);
|
||||
unsigned rs = (instr >> 21) & 31;
|
||||
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
|
||||
jit_addi(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, simm);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
DISASM("addi r%u, r%u, %d\n", rt, rs, simm);
|
||||
break;
|
||||
}
|
||||
|
||||
case 012: // SLTI
|
||||
{
|
||||
unsigned rt = (instr >> 16) & 31;
|
||||
NOP_IF_RT_ZERO();
|
||||
int16_t simm = int16_t(instr);
|
||||
unsigned rs = (instr >> 21) & 31;
|
||||
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
|
||||
jit_lti(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, simm);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
DISASM("slti r%u, r%u, %d\n", rt, rs, simm);
|
||||
break;
|
||||
}
|
||||
|
||||
case 013: // SLTIU
|
||||
{
|
||||
unsigned rt = (instr >> 16) & 31;
|
||||
NOP_IF_RT_ZERO();
|
||||
uint16_t imm = uint16_t(instr);
|
||||
unsigned rs = (instr >> 21) & 31;
|
||||
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
|
||||
jit_lti_u(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, imm);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
DISASM("sltiu r%u, r%u, %u\n", rt, rs, imm);
|
||||
break;
|
||||
}
|
||||
|
||||
case 014: // ANDI
|
||||
{
|
||||
unsigned rt = (instr >> 16) & 31;
|
||||
NOP_IF_RT_ZERO();
|
||||
unsigned rs = (instr >> 21) & 31;
|
||||
uint16_t imm = uint16_t(instr);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
|
||||
jit_andi(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, imm);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
DISASM("andi r%u, r%u, %u\n", rt, rs, imm);
|
||||
break;
|
||||
}
|
||||
|
||||
case 015: // ORI
|
||||
{
|
||||
unsigned rt = (instr >> 16) & 31;
|
||||
NOP_IF_RT_ZERO();
|
||||
unsigned rs = (instr >> 21) & 31;
|
||||
uint16_t imm = uint16_t(instr);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
|
||||
jit_ori(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, imm);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
DISASM("ori r%u, r%u, %u\n", rt, rs, imm);
|
||||
break;
|
||||
}
|
||||
|
||||
case 016: // XORI
|
||||
{
|
||||
unsigned rt = (instr >> 16) & 31;
|
||||
if (rt == 0)
|
||||
break;
|
||||
unsigned rs = (instr >> 21) & 31;
|
||||
uint16_t imm = uint16_t(instr);
|
||||
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
|
||||
jit_xori(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, imm);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
DISASM("xori r%u, r%u, %u\n", rt, rs, imm);
|
||||
break;
|
||||
}
|
||||
|
||||
case 017: // LUI
|
||||
{
|
||||
unsigned rt = (instr >> 16) & 31;
|
||||
NOP_IF_RT_ZERO();
|
||||
int16_t imm = int16_t(instr);
|
||||
jit_movi(JIT_REGISTER_TMP0, imm << 16);
|
||||
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
|
||||
DISASM("lui r%u, %d\n", rt, imm);
|
||||
break;
|
||||
}
|
||||
|
||||
case 020: // COP0
|
||||
DISASM("cop0 %u\n", 0);
|
||||
break;
|
||||
|
||||
case 022: // COP2
|
||||
DISASM("cop2 %u\n", 0);
|
||||
break;
|
||||
|
||||
case 040: // LB
|
||||
DISASM("lb %u\n", 0);
|
||||
break;
|
||||
|
||||
case 041: // LH
|
||||
DISASM("lh %u\n", 0);
|
||||
break;
|
||||
|
||||
case 043: // LW
|
||||
DISASM("lw %u\n", 0);
|
||||
break;
|
||||
|
||||
case 044: // LBU
|
||||
DISASM("lbu %u\n", 0);
|
||||
break;
|
||||
|
||||
case 045: // LHU
|
||||
DISASM("lhu %u\n", 0);
|
||||
break;
|
||||
|
||||
case 050: // SB
|
||||
DISASM("sb %u\n", 0);
|
||||
break;
|
||||
|
||||
case 051: // SH
|
||||
DISASM("sh %u\n", 0);
|
||||
break;
|
||||
|
||||
case 053: // SW
|
||||
DISASM("sw %u\n", 0);
|
||||
break;
|
||||
|
||||
case 062: // LWC2
|
||||
DISASM("lcw2 %u\n", 0);
|
||||
break;
|
||||
|
||||
case 072: // SWC2
|
||||
DISASM("swc2 %u\n", 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Func CPU::jit_region(uint64_t hash, unsigned pc_word, unsigned instruction_count)
|
||||
{
|
||||
mips_disasm.clear();
|
||||
jit_state_t *_jit = jit_new_state();
|
||||
|
||||
jit_prolog();
|
||||
jit_tramp(JIT_FRAME_SIZE);
|
||||
|
||||
jit_movi(JIT_R0, 10);
|
||||
jit_stxi_i(offsetof(CPUState, sr) + 4, JIT_REGISTER_STATE, JIT_R0);
|
||||
jit_movi(JIT_R0, 20);
|
||||
jit_stxi_i(offsetof(CPUState, sr) + 8, JIT_REGISTER_STATE, JIT_R0);
|
||||
jit_movi(JIT_R0, 30);
|
||||
jit_stxi_i(offsetof(CPUState, sr) + 12, JIT_REGISTER_STATE, JIT_R0);
|
||||
jit_movi(JIT_R0, 40);
|
||||
jit_stxi_i(offsetof(CPUState, sr) + 16, JIT_REGISTER_STATE, JIT_R0);
|
||||
jit_movi(JIT_REGISTER_MODE, MODE_BREAK);
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, 4);
|
||||
auto *jmp = jit_jmpi();
|
||||
jit_patch_abs(jmp, thunks.return_thunk);
|
||||
// We can potentially branch to every instruction in the block, so declare forward references to them here.
|
||||
jit_node_t *branch_targets[CODE_BLOCK_SIZE];
|
||||
for (unsigned i = 0; i < instruction_count; i++)
|
||||
branch_targets[i] = jit_forward();
|
||||
|
||||
jit_node_t *latent_delay_slot = nullptr;
|
||||
|
||||
InstructionInfo last_info = {};
|
||||
for (unsigned i = 0; i < instruction_count; i++)
|
||||
{
|
||||
jit_link(branch_targets[i]);
|
||||
|
||||
uint32_t instr = state.imem[pc_word + i];
|
||||
InstructionInfo inst_info = {};
|
||||
jit_instruction(_jit, (pc_word + i) << 2, instr, inst_info, last_info, i == 0);
|
||||
|
||||
if (i == 0 && !inst_info.handles_delay_slot)
|
||||
{
|
||||
// After the first instruction, we might need to resolve a latent delay slot.
|
||||
latent_delay_slot = jit_forward();
|
||||
jit_ldxi_i(JIT_REGISTER_TMP0, JIT_REGISTER_STATE, offsetof(CPUState, has_delay_slot));
|
||||
jit_patch_at(jit_bnei(JIT_REGISTER_TMP0, 0), latent_delay_slot);
|
||||
}
|
||||
else if (i != 0 && !inst_info.handles_delay_slot && last_info.branch)
|
||||
{
|
||||
// Normal handling of the delay slot.
|
||||
jit_handle_delay_slot(_jit, last_info, branch_targets,
|
||||
pc_word << 2,
|
||||
(pc_word + instruction_count) << 2);
|
||||
}
|
||||
last_info = inst_info;
|
||||
}
|
||||
|
||||
// Jump to another block.
|
||||
jit_end_of_block(_jit, (pc_word + instruction_count) << 2, last_info);
|
||||
|
||||
// If we had a latent delay slot, we handle it here.
|
||||
if (latent_delay_slot)
|
||||
{
|
||||
jit_link(latent_delay_slot);
|
||||
// We cannot execute a branch inside a delay slot, so just assume we do not have to chain together these.
|
||||
// We could technically handle it, but it gets messy (and it's illegal MIPS), so don't bother.
|
||||
jit_movi(JIT_REGISTER_NEXT_PC, 0);
|
||||
jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
|
||||
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, branch_target));
|
||||
jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
|
||||
}
|
||||
|
||||
auto ret = reinterpret_cast<Func>(jit_emit());
|
||||
|
||||
printf(" === DISASM ===\n");
|
||||
jit_disassemble();
|
||||
printf("%s\n", mips_disasm.c_str());
|
||||
printf(" === DISASM END ===\n\n");
|
||||
cleanup_jit_states.push_back(_jit);
|
||||
return ret;
|
||||
@ -328,5 +958,52 @@ ReturnMode CPU::run()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const char *reg_names[32] = {
|
||||
"zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
|
||||
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra",
|
||||
};
|
||||
#define NAME(reg) reg_names[reg]
|
||||
|
||||
void CPU::print_registers()
|
||||
{
|
||||
fprintf(stderr, "RSP state:\n");
|
||||
fprintf(stderr, " PC: 0x%03x\n", state.pc);
|
||||
for (unsigned i = 1; i < 32; i++)
|
||||
fprintf(stderr, " SR[%s] = 0x%08x\n", NAME(i), state.sr[i]);
|
||||
fprintf(stderr, "\n");
|
||||
for (unsigned i = 0; i < 32; i++)
|
||||
{
|
||||
fprintf(stderr, " VR[%02u] = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", i,
|
||||
state.cp2.regs[i].e[0], state.cp2.regs[i].e[1], state.cp2.regs[i].e[2], state.cp2.regs[i].e[3],
|
||||
state.cp2.regs[i].e[4], state.cp2.regs[i].e[5], state.cp2.regs[i].e[6], state.cp2.regs[i].e[7]);
|
||||
}
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
{
|
||||
static const char *strings[] = { "ACC_HI", "ACC_MD", "ACC_LO" };
|
||||
fprintf(stderr, " %s = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", strings[i],
|
||||
state.cp2.acc.e[8 * i + 0], state.cp2.acc.e[8 * i + 1], state.cp2.acc.e[8 * i + 2],
|
||||
state.cp2.acc.e[8 * i + 3], state.cp2.acc.e[8 * i + 4], state.cp2.acc.e[8 * i + 5],
|
||||
state.cp2.acc.e[8 * i + 6], state.cp2.acc.e[8 * i + 7]);
|
||||
}
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
{
|
||||
static const char *strings[] = { "VCO", "VCC", "VCE" };
|
||||
uint16_t flags = rsp_get_flags(state.cp2.flags[i].e);
|
||||
fprintf(stderr, " %s = 0x%04x\n", strings[i], flags);
|
||||
}
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " Div Out = 0x%04x\n", state.cp2.div_out);
|
||||
fprintf(stderr, " Div In = 0x%04x\n", state.cp2.div_in);
|
||||
fprintf(stderr, " DP flag = 0x%04x\n", state.cp2.dp_flag);
|
||||
}
|
||||
|
||||
} // namespace JIT
|
||||
} // namespace RSP
|
19
rsp_jit.hpp
19
rsp_jit.hpp
@ -73,7 +73,7 @@ private:
|
||||
|
||||
std::unordered_map<uint64_t, Func> cached_blocks[IMEM_WORDS];
|
||||
|
||||
Func jit_region(uint64_t hash, unsigned pc, unsigned count);
|
||||
Func jit_region(uint64_t hash, unsigned pc_word, unsigned instruction_count);
|
||||
|
||||
int enter(uint32_t pc);
|
||||
|
||||
@ -90,6 +90,23 @@ private:
|
||||
} thunks;
|
||||
|
||||
unsigned analyze_static_end(unsigned pc, unsigned end);
|
||||
|
||||
struct InstructionInfo
|
||||
{
|
||||
uint32_t branch_target;
|
||||
bool indirect;
|
||||
bool branch;
|
||||
bool conditional;
|
||||
bool handles_delay_slot;
|
||||
};
|
||||
void jit_instruction(jit_state_t *_jit, uint32_t pc, uint32_t instr, InstructionInfo &info, const InstructionInfo &last_info,
|
||||
bool first_instruction);
|
||||
void jit_exit(jit_state_t *_jit, uint32_t pc, const InstructionInfo &last_info, ReturnMode mode, bool first_instruction);
|
||||
void jit_end_of_block(jit_state_t *_jit, uint32_t pc, const InstructionInfo &last_info);
|
||||
static void jit_load_register(jit_state_t *_jit, unsigned jit_register, unsigned mips_register);
|
||||
static void jit_store_register(jit_state_t *_jit, unsigned jit_register, unsigned mips_register);
|
||||
void jit_handle_delay_slot(jit_state_t *_jit, const InstructionInfo &last_info, jit_node_t **local_targets, uint32_t base_pc, uint32_t end_pc);
|
||||
std::string mips_disasm;
|
||||
};
|
||||
} // namespace JIT
|
||||
} // namespace RSP
|
||||
|
Loading…
Reference in New Issue
Block a user