JIT out some code, it's a start.

This commit is contained in:
Hans-Kristian Arntzen 2020-01-30 14:50:20 +01:00
parent 350ca7d46c
commit c8bdf5023e
3 changed files with 716 additions and 18 deletions

View File

@ -30,6 +30,7 @@ static vector<uint32_t> read_binary(const char *path, bool flip)
return v;
}
#if 0
static bool read_tag_validate(FILE *file, const char *tag)
{
char tmp[9] = {};
@ -289,10 +290,11 @@ static void validate_trace(RSP::CPU &cpu, const char *path)
fclose(file);
}
#endif
int main(int argc, char *argv[])
{
RSP::CPU cpu;
RSP::JIT::CPU cpu;
auto &state = cpu.get_state();
uint32_t cr[16] = {};
@ -318,8 +320,10 @@ int main(int argc, char *argv[])
cpu.run();
}
}
#if 0
else if (argc == 2)
validate_trace(cpu, argv[1]);
#endif
else
return 1;
}

View File

@ -1,12 +1,23 @@
#include "rsp_jit.hpp"
#include <utility>
#include <assert.h>
using namespace std;
// We're only guaranteed 3 V registers (x86).
#define JIT_REGISTER_SELF JIT_V0
#define JIT_REGISTER_STATE JIT_V1
#define JIT_REGISTER_DMEM JIT_V2
#define JIT_REGISTER_MODE JIT_R1
#define JIT_REGISTER_NEXT_PC JIT_R0
// Freely used to implement instructions.
#define JIT_REGISTER_TMP0 JIT_R0
#define JIT_REGISTER_TMP1 JIT_R1
// We're only guaranteed 3 R registers (x86).
#define JIT_REGISTER_COND_BRANCH_TAKEN JIT_R(JIT_R_NUM - 1)
#define JIT_FRAME_SIZE 256
namespace RSP
@ -72,6 +83,9 @@ uint64_t CPU::hash_imem(unsigned pc, unsigned count) const
unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
{
// Scans through IMEM and finds the logical "end" of the instruction stream.
// A logical end of the instruction stream is where execution must terminate.
// If we have forward branches into this block, i.e. gotos, they extend the execution stream.
// However, we cannot execute beyond end.
unsigned max_static_pc = pc;
unsigned count = end - pc;
@ -100,7 +114,8 @@ unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
switch (instr & 63)
{
case 010:
// JR always terminates either by returning or exiting.
case 011:
// JR and JALR always terminate execution of the block.
// We execute the next instruction via delay slot and exit.
// Unless we can branch past the JR
// (max_static_pc will be higher than expected),
@ -130,6 +145,7 @@ unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
case 001: // BGEZ
case 021: // BGEZAL
case 020: // BLTZAL
// TODO/Optimization: Handle static branch case where $0 is used.
target = (pc + i + 1 + instr) & 0x3ff;
if (target >= pc && target < end) // goto
max_static_pc = max(max_static_pc, target + 1);
@ -140,8 +156,9 @@ unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
}
break;
case 002:
// J is resolved by goto.
case 002: // J
case 003: // JAL
// J is resolved by goto. Same with JAL if call target happens to be inside the block.
target = instr & 0x3ff;
if (target >= pc && target < end) // goto
{
@ -169,6 +186,7 @@ unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
case 005: // BNE
case 006: // BLEZ
case 007: // BGTZ
// TODO/Optimization: Handle static branch case where $0 is used.
target = (pc + i + 1 + instr) & 0x3ff;
if (target >= pc && target < end) // goto
max_static_pc = max(max_static_pc, target + 1);
@ -207,6 +225,7 @@ void CPU::init_jit_thunks()
jit_getarg(JIT_REGISTER_SELF, self);
jit_getarg(JIT_REGISTER_STATE, state);
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, pc));
jit_ldxi(JIT_REGISTER_DMEM, JIT_REGISTER_STATE, offsetof(CPUState, dmem));
jit_movi(JIT_REGISTER_MODE, MODE_ENTER);
auto *entry_label = jit_indirect();
@ -272,30 +291,641 @@ int CPU::enter(uint32_t pc)
return thunks.enter_frame(this, &state);
}
Func CPU::jit_region(uint64_t hash, unsigned pc, unsigned count)
void CPU::jit_end_of_block(jit_state_t *_jit, uint32_t pc, const CPU::InstructionInfo &last_info)
{
// If we run off the end of a block with a pending delay slot, we need to move it to CPUState.
// We always branch to the next PC, and the delay slot will be handled after the first instruction in next block.
auto *forward = jit_forward();
if (last_info.branch)
{
if (last_info.conditional)
jit_patch_at(jit_beqi(JIT_REGISTER_COND_BRANCH_TAKEN, 0), forward);
if (last_info.indirect)
jit_ldxi_i(JIT_REGISTER_TMP0, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * last_info.branch_target);
else
jit_movi(JIT_REGISTER_TMP0, last_info.branch_target);
jit_stxi_i(offsetof(CPUState, branch_target), JIT_REGISTER_STATE, JIT_REGISTER_TMP0);
jit_movi(JIT_REGISTER_TMP0, 1);
jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, JIT_REGISTER_TMP0);
}
jit_link(forward);
jit_movi(JIT_REGISTER_NEXT_PC, pc);
jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
}
void CPU::jit_handle_delay_slot(jit_state_t *_jit, const InstructionInfo &last_info,
jit_node_t **local_targets, uint32_t base_pc, uint32_t end_pc)
{
if (last_info.conditional)
{
if (!last_info.indirect && last_info.branch_target >= base_pc && last_info.branch_target < end_pc)
{
jit_patch_at(jit_bnei(JIT_REGISTER_COND_BRANCH_TAKEN, 0), local_targets[(last_info.branch_target - base_pc) >> 2]);
}
else
{
auto *no_branch = jit_bnei(JIT_REGISTER_COND_BRANCH_TAKEN, 0);
if (last_info.indirect)
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * last_info.branch_target);
else
jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
jit_patch(no_branch);
}
}
else
{
if (!last_info.indirect && last_info.branch_target >= base_pc && last_info.branch_target < end_pc)
{
jit_patch_at(jit_jmpi(), local_targets[(last_info.branch_target - base_pc) >> 2]);
}
else
{
if (last_info.indirect)
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE,offsetof(CPUState, sr) + 4 * last_info.branch_target);
else
jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
}
}
}
void CPU::jit_exit(jit_state_t *_jit, uint32_t pc, const InstructionInfo &last_info, ReturnMode mode, bool first_instruction)
{
if (first_instruction)
{
// Need to consider that we need to move delay slot to PC.
jit_ldxi_i(JIT_REGISTER_TMP0, JIT_REGISTER_STATE, offsetof(CPUState, has_delay_slot));
auto *latent_delay_slot = jit_forward();
jit_patch_at(jit_bnei(JIT_REGISTER_TMP0, 0), latent_delay_slot);
// Common case.
// Immediately exit.
jit_movi(JIT_REGISTER_MODE, mode);
jit_movi(JIT_REGISTER_NEXT_PC, pc + 4);
auto *jmp = jit_jmpi();
jit_patch_abs(jmp, thunks.return_thunk);
// If we had a latent delay slot, we handle it here.
jit_link(latent_delay_slot);
// We cannot execute a branch inside a delay slot, so just assume we do not have to chain together these.
// We could technically handle it, but it gets messy (and it's illegal MIPS), so don't bother.
jit_movi(JIT_REGISTER_NEXT_PC, 0);
jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
jit_movi(JIT_REGISTER_MODE, mode);
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, branch_target));
}
else if (!last_info.branch)
{
// Immediately exit.
jit_movi(JIT_REGISTER_MODE, mode);
jit_movi(JIT_REGISTER_NEXT_PC, pc + 4);
}
else if (!last_info.indirect && !last_info.conditional)
{
// Redirect PC to whatever value we were supposed to branch to.
jit_movi(JIT_REGISTER_MODE, mode);
jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
}
else if (!last_info.conditional)
{
// We have an indirect branch, load that register into PC.
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * last_info.branch_target);
jit_movi(JIT_REGISTER_MODE, mode);
}
else if (last_info.indirect)
{
// Indirect conditional branch.
auto *node = jit_beqi(JIT_REGISTER_COND_BRANCH_TAKEN, 0);
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * last_info.branch_target);
auto *to_end = jit_jmpi();
jit_patch(node);
jit_movi(JIT_REGISTER_NEXT_PC, pc + 4);
jit_patch(to_end);
}
else
{
// Direct conditional branch.
auto *node = jit_beqi(JIT_REGISTER_COND_BRANCH_TAKEN, 0);
jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
auto *to_end = jit_jmpi();
jit_patch(node);
jit_movi(JIT_REGISTER_NEXT_PC, pc + 4);
jit_patch(to_end);
}
auto *jmp = jit_jmpi();
jit_patch_abs(jmp, thunks.return_thunk);
}
void CPU::jit_load_register(jit_state_t *_jit, unsigned jit_register, unsigned mips_register)
{
if (mips_register == 0)
jit_movi(jit_register, 0);
else
jit_ldxi_i(jit_register, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_register);
}
void CPU::jit_store_register(jit_state_t *_jit, unsigned jit_register, unsigned mips_register)
{
assert(mips_register != 0);
jit_stxi_i(offsetof(CPUState, sr) + 4 * mips_register, JIT_REGISTER_STATE, jit_register);
}
#define DISASM(asmfmt, ...) do { \
char buf[1024]; \
sprintf(buf, "0x%03x " asmfmt, pc, __VA_ARGS__); \
mips_disasm += buf; \
} while(0)
#define DISASM_NOP() do { \
char buf[1024]; \
sprintf(buf, "0x%03x nop\n", pc); \
mips_disasm += buf; \
} while(0)
void CPU::jit_instruction(jit_state_t *_jit, uint32_t pc, uint32_t instr,
InstructionInfo &info, const InstructionInfo &last_info,
bool first_instruction)
{
// VU
if ((instr >> 25) == 0x25)
{
return;
}
// TODO: Meaningful register allocation.
// For now, always flush register state to memory after an instruction for simplicity.
// Should be red-hot in L1 cache, so probably won't be that bad.
// On x86, we unfortunately have an anemic register bank to work with.
uint32_t type = instr >> 26;
#define NOP_IF_RD_ZERO() if (rd == 0) { DISASM_NOP(); break; }
#define NOP_IF_RT_ZERO() if (rt == 0) { DISASM_NOP(); break; }
switch (type)
{
case 000:
{
auto rd = (instr >> 11) & 31;
auto rt = (instr >> 16) & 31;
auto shift = (instr >> 6) & 31;
auto rs = (instr >> 21) & 31;
switch (instr & 63)
{
case 000: // SLL
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_lshi(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, shift);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("sll r%u, r%u, %u\n", rd, rt, shift);
break;
}
case 002: // SRL
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_rshi_u(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, shift);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("srl r%u, r%u, %u\n", rd, rt, shift);
break;
}
case 003: // SRA
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_rshi(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, shift);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("sra r%u, r%u, %u\n", rd, rt, shift);
break;
}
case 004: // SLLV
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
jit_andi(JIT_REGISTER_TMP1, JIT_REGISTER_TMP1, 31);
jit_lshr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("sllv r%u, r%u, r%u\n", rd, rt, rs);
break;
}
case 006: // SRLV
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
jit_andi(JIT_REGISTER_TMP1, JIT_REGISTER_TMP1, 31);
jit_rshr_u(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("srlv r%u, r%u, r%u\n", rd, rt, rs);
break;
}
case 007: // SRAV
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
jit_andi(JIT_REGISTER_TMP1, JIT_REGISTER_TMP1, 31);
jit_rshr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("srav r%u, r%u, r%u\n", rd, rt, rs);
break;
}
case 010: // JR
DISASM("jr %u\n", 0);
break;
case 011: // JALR
DISASM("jalr %u\n", 0);
break;
case 015: // BREAK
{
jit_exit(_jit, pc, last_info, MODE_BREAK, first_instruction);
info.handles_delay_slot = true;
DISASM("break %u\n", 0);
break;
}
case 040: // ADD
case 041: // ADDU
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
jit_addr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("addu r%u, r%u, r%u\n", rd, rt, rs);
break;
}
case 042: // SUB
case 043: // SUBU
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
jit_subr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("subu r%u, r%u, r%u\n", rd, rt, rs);
break;
}
case 044: // AND
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
jit_andr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("and r%u, r%u, r%u\n", rd, rt, rs);
break;
}
case 045: // OR
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
jit_orr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("or r%u, r%u, r%u\n", rd, rt, rs);
break;
}
case 046: // XOR
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
jit_xorr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("xor r%u, r%u, r%u\n", rd, rt, rs);
break;
}
case 047: // NOR
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rt);
jit_load_register(_jit, JIT_REGISTER_TMP1, rs);
jit_orr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_xori(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, jit_word_t(-1));
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("nor r%u, r%u, r%u\n", rd, rt, rs);
break;
}
case 052: // SLT
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
jit_load_register(_jit, JIT_REGISTER_TMP1, rt);
jit_ltr(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("slt r%u, r%u, r%u\n", rd, rt, rs);
break;
}
case 053: // SLTU
{
NOP_IF_RD_ZERO();
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
jit_load_register(_jit, JIT_REGISTER_TMP1, rt);
jit_ltr_u(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, JIT_REGISTER_TMP1);
jit_store_register(_jit, JIT_REGISTER_TMP0, rd);
DISASM("sltu r%u, r%u, r%u\n", rd, rt, rs);
break;
}
default:
break;
}
break;
}
case 001: // REGIMM
{
//unsigned rs = (instr >> 21) & 31;
unsigned rt = (instr >> 16) & 31;
switch (rt)
{
case 020: // BLTZAL
DISASM("bltzal %u\n", 0);
break;
case 000: // BLTZ
DISASM("bltz %u\n", 0);
break;
case 021: // BGEZAL
DISASM("bgezal %u\n", 0);
break;
case 001: // BGEZ
DISASM("bgez %u\n", 0);
break;
}
break;
}
case 003: // JAL
{
uint32_t target_pc = (instr & 0x3ffu) << 2;
jit_movi(JIT_REGISTER_TMP0, pc + 8);
jit_store_register(_jit, JIT_REGISTER_TMP0, 31);
info.branch = true;
info.branch_target = target_pc;
DISASM("jal 0x%03x\n", target_pc);
break;
}
case 002: // J
{
uint32_t target_pc = (instr & 0x3ffu) << 2;
info.branch = true;
info.branch_target = target_pc;
DISASM("j 0x%03x\n", target_pc);
break;
}
case 004: // BEQ
DISASM("beq %u\n", 0);
break;
case 005: // BNE
DISASM("bne %u\n", 0);
break;
case 006: // BLEZ
DISASM("blez %u\n", 0);
break;
case 007: // BGTZ
DISASM("bgtz %u\n", 0);
break;
case 010: // ADDI
case 011:
{
unsigned rt = (instr >> 16) & 31;
NOP_IF_RT_ZERO();
int16_t simm = int16_t(instr);
unsigned rs = (instr >> 21) & 31;
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
jit_addi(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, simm);
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
DISASM("addi r%u, r%u, %d\n", rt, rs, simm);
break;
}
case 012: // SLTI
{
unsigned rt = (instr >> 16) & 31;
NOP_IF_RT_ZERO();
int16_t simm = int16_t(instr);
unsigned rs = (instr >> 21) & 31;
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
jit_lti(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, simm);
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
DISASM("slti r%u, r%u, %d\n", rt, rs, simm);
break;
}
case 013: // SLTIU
{
unsigned rt = (instr >> 16) & 31;
NOP_IF_RT_ZERO();
uint16_t imm = uint16_t(instr);
unsigned rs = (instr >> 21) & 31;
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
jit_lti_u(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, imm);
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
DISASM("sltiu r%u, r%u, %u\n", rt, rs, imm);
break;
}
case 014: // ANDI
{
unsigned rt = (instr >> 16) & 31;
NOP_IF_RT_ZERO();
unsigned rs = (instr >> 21) & 31;
uint16_t imm = uint16_t(instr);
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
jit_andi(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, imm);
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
DISASM("andi r%u, r%u, %u\n", rt, rs, imm);
break;
}
case 015: // ORI
{
unsigned rt = (instr >> 16) & 31;
NOP_IF_RT_ZERO();
unsigned rs = (instr >> 21) & 31;
uint16_t imm = uint16_t(instr);
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
jit_ori(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, imm);
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
DISASM("ori r%u, r%u, %u\n", rt, rs, imm);
break;
}
case 016: // XORI
{
unsigned rt = (instr >> 16) & 31;
if (rt == 0)
break;
unsigned rs = (instr >> 21) & 31;
uint16_t imm = uint16_t(instr);
jit_load_register(_jit, JIT_REGISTER_TMP0, rs);
jit_xori(JIT_REGISTER_TMP0, JIT_REGISTER_TMP0, imm);
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
DISASM("xori r%u, r%u, %u\n", rt, rs, imm);
break;
}
case 017: // LUI
{
unsigned rt = (instr >> 16) & 31;
NOP_IF_RT_ZERO();
int16_t imm = int16_t(instr);
jit_movi(JIT_REGISTER_TMP0, imm << 16);
jit_store_register(_jit, JIT_REGISTER_TMP0, rt);
DISASM("lui r%u, %d\n", rt, imm);
break;
}
case 020: // COP0
DISASM("cop0 %u\n", 0);
break;
case 022: // COP2
DISASM("cop2 %u\n", 0);
break;
case 040: // LB
DISASM("lb %u\n", 0);
break;
case 041: // LH
DISASM("lh %u\n", 0);
break;
case 043: // LW
DISASM("lw %u\n", 0);
break;
case 044: // LBU
DISASM("lbu %u\n", 0);
break;
case 045: // LHU
DISASM("lhu %u\n", 0);
break;
case 050: // SB
DISASM("sb %u\n", 0);
break;
case 051: // SH
DISASM("sh %u\n", 0);
break;
case 053: // SW
DISASM("sw %u\n", 0);
break;
case 062: // LWC2
DISASM("lcw2 %u\n", 0);
break;
case 072: // SWC2
DISASM("swc2 %u\n", 0);
break;
default:
break;
}
}
Func CPU::jit_region(uint64_t hash, unsigned pc_word, unsigned instruction_count)
{
mips_disasm.clear();
jit_state_t *_jit = jit_new_state();
jit_prolog();
jit_tramp(JIT_FRAME_SIZE);
jit_movi(JIT_R0, 10);
jit_stxi_i(offsetof(CPUState, sr) + 4, JIT_REGISTER_STATE, JIT_R0);
jit_movi(JIT_R0, 20);
jit_stxi_i(offsetof(CPUState, sr) + 8, JIT_REGISTER_STATE, JIT_R0);
jit_movi(JIT_R0, 30);
jit_stxi_i(offsetof(CPUState, sr) + 12, JIT_REGISTER_STATE, JIT_R0);
jit_movi(JIT_R0, 40);
jit_stxi_i(offsetof(CPUState, sr) + 16, JIT_REGISTER_STATE, JIT_R0);
jit_movi(JIT_REGISTER_MODE, MODE_BREAK);
jit_movi(JIT_REGISTER_NEXT_PC, 4);
auto *jmp = jit_jmpi();
jit_patch_abs(jmp, thunks.return_thunk);
// We can potentially branch to every instruction in the block, so declare forward references to them here.
jit_node_t *branch_targets[CODE_BLOCK_SIZE];
for (unsigned i = 0; i < instruction_count; i++)
branch_targets[i] = jit_forward();
jit_node_t *latent_delay_slot = nullptr;
InstructionInfo last_info = {};
for (unsigned i = 0; i < instruction_count; i++)
{
jit_link(branch_targets[i]);
uint32_t instr = state.imem[pc_word + i];
InstructionInfo inst_info = {};
jit_instruction(_jit, (pc_word + i) << 2, instr, inst_info, last_info, i == 0);
if (i == 0 && !inst_info.handles_delay_slot)
{
// After the first instruction, we might need to resolve a latent delay slot.
latent_delay_slot = jit_forward();
jit_ldxi_i(JIT_REGISTER_TMP0, JIT_REGISTER_STATE, offsetof(CPUState, has_delay_slot));
jit_patch_at(jit_bnei(JIT_REGISTER_TMP0, 0), latent_delay_slot);
}
else if (i != 0 && !inst_info.handles_delay_slot && last_info.branch)
{
// Normal handling of the delay slot.
jit_handle_delay_slot(_jit, last_info, branch_targets,
pc_word << 2,
(pc_word + instruction_count) << 2);
}
last_info = inst_info;
}
// Jump to another block.
jit_end_of_block(_jit, (pc_word + instruction_count) << 2, last_info);
// If we had a latent delay slot, we handle it here.
if (latent_delay_slot)
{
jit_link(latent_delay_slot);
// We cannot execute a branch inside a delay slot, so just assume we do not have to chain together these.
// We could technically handle it, but it gets messy (and it's illegal MIPS), so don't bother.
jit_movi(JIT_REGISTER_NEXT_PC, 0);
jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, branch_target));
jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
}
auto ret = reinterpret_cast<Func>(jit_emit());
printf(" === DISASM ===\n");
jit_disassemble();
printf("%s\n", mips_disasm.c_str());
printf(" === DISASM END ===\n\n");
cleanup_jit_states.push_back(_jit);
return ret;
@ -328,5 +958,52 @@ ReturnMode CPU::run()
}
}
}
static const char *reg_names[32] = {
"zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra",
};
#define NAME(reg) reg_names[reg]
void CPU::print_registers()
{
fprintf(stderr, "RSP state:\n");
fprintf(stderr, " PC: 0x%03x\n", state.pc);
for (unsigned i = 1; i < 32; i++)
fprintf(stderr, " SR[%s] = 0x%08x\n", NAME(i), state.sr[i]);
fprintf(stderr, "\n");
for (unsigned i = 0; i < 32; i++)
{
fprintf(stderr, " VR[%02u] = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", i,
state.cp2.regs[i].e[0], state.cp2.regs[i].e[1], state.cp2.regs[i].e[2], state.cp2.regs[i].e[3],
state.cp2.regs[i].e[4], state.cp2.regs[i].e[5], state.cp2.regs[i].e[6], state.cp2.regs[i].e[7]);
}
fprintf(stderr, "\n");
for (unsigned i = 0; i < 3; i++)
{
static const char *strings[] = { "ACC_HI", "ACC_MD", "ACC_LO" };
fprintf(stderr, " %s = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", strings[i],
state.cp2.acc.e[8 * i + 0], state.cp2.acc.e[8 * i + 1], state.cp2.acc.e[8 * i + 2],
state.cp2.acc.e[8 * i + 3], state.cp2.acc.e[8 * i + 4], state.cp2.acc.e[8 * i + 5],
state.cp2.acc.e[8 * i + 6], state.cp2.acc.e[8 * i + 7]);
}
fprintf(stderr, "\n");
for (unsigned i = 0; i < 3; i++)
{
static const char *strings[] = { "VCO", "VCC", "VCE" };
uint16_t flags = rsp_get_flags(state.cp2.flags[i].e);
fprintf(stderr, " %s = 0x%04x\n", strings[i], flags);
}
fprintf(stderr, "\n");
fprintf(stderr, " Div Out = 0x%04x\n", state.cp2.div_out);
fprintf(stderr, " Div In = 0x%04x\n", state.cp2.div_in);
fprintf(stderr, " DP flag = 0x%04x\n", state.cp2.dp_flag);
}
} // namespace JIT
} // namespace RSP

View File

@ -73,7 +73,7 @@ private:
std::unordered_map<uint64_t, Func> cached_blocks[IMEM_WORDS];
Func jit_region(uint64_t hash, unsigned pc, unsigned count);
Func jit_region(uint64_t hash, unsigned pc_word, unsigned instruction_count);
int enter(uint32_t pc);
@ -90,6 +90,23 @@ private:
} thunks;
unsigned analyze_static_end(unsigned pc, unsigned end);
struct InstructionInfo
{
uint32_t branch_target;
bool indirect;
bool branch;
bool conditional;
bool handles_delay_slot;
};
void jit_instruction(jit_state_t *_jit, uint32_t pc, uint32_t instr, InstructionInfo &info, const InstructionInfo &last_info,
bool first_instruction);
void jit_exit(jit_state_t *_jit, uint32_t pc, const InstructionInfo &last_info, ReturnMode mode, bool first_instruction);
void jit_end_of_block(jit_state_t *_jit, uint32_t pc, const InstructionInfo &last_info);
static void jit_load_register(jit_state_t *_jit, unsigned jit_register, unsigned mips_register);
static void jit_store_register(jit_state_t *_jit, unsigned jit_register, unsigned mips_register);
void jit_handle_delay_slot(jit_state_t *_jit, const InstructionInfo &last_info, jit_node_t **local_targets, uint32_t base_pc, uint32_t end_pc);
std::string mips_disasm;
};
} // namespace JIT
} // namespace RSP