riscv: Use jit address offsets directly.

We'll have IR able to use block number or target offset.
This commit is contained in:
Unknown W. Brackets 2023-07-27 22:23:06 -07:00
parent b473f1e649
commit 8d60c10a64
5 changed files with 78 additions and 95 deletions

View File

@ -85,7 +85,8 @@ void IRJit::Compile(u32 em_address) {
if (block_num != -1) {
IRBlock *b = blocks_.GetBlock(block_num);
// Okay, let's link and finalize the block now.
b->Finalize(block_num);
int cookie = b->GetTargetOffset() < 0 ? block_num : b->GetTargetOffset();
b->Finalize(cookie);
if (b->IsValid()) {
// Success, we're done.
return;
@ -128,13 +129,13 @@ bool IRJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32
b->SetOriginalSize(mipsBytes);
if (preload) {
// Hash, then only update page stats, don't link yet.
b->UpdateHash();
blocks_.FinalizeBlock(block_num, true);
} else {
// Overwrites the first instruction, and also updates stats.
// TODO: Should we always hash? Then we can reuse blocks.
blocks_.FinalizeBlock(block_num);
b->UpdateHash();
}
if (!CompileTargetBlock(b, block_num, preload))
return false;
// Overwrites the first instruction, and also updates stats.
blocks_.FinalizeBlock(block_num, preload);
return true;
}
@ -264,7 +265,8 @@ void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
void IRBlockCache::Clear() {
for (int i = 0; i < (int)blocks_.size(); ++i) {
blocks_[i].Destroy(i);
int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
blocks_[i].Destroy(cookie);
}
blocks_.clear();
byPage_.clear();
@ -283,7 +285,8 @@ void IRBlockCache::InvalidateICache(u32 address, u32 length) {
for (int i : blocksInPage) {
if (blocks_[i].OverlapsRange(address, length)) {
// Not removing from the page, hopefully doesn't build up with small recompiles.
blocks_[i].Destroy(i);
int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
blocks_[i].Destroy(cookie);
}
}
}
@ -291,7 +294,8 @@ void IRBlockCache::InvalidateICache(u32 address, u32 length) {
void IRBlockCache::FinalizeBlock(int i, bool preload) {
if (!preload) {
blocks_[i].Finalize(i);
int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
blocks_[i].Finalize(cookie);
}
u32 startAddr, size;
@ -331,13 +335,30 @@ int IRBlockCache::FindPreloadBlock(u32 em_address) {
return -1;
}
int IRBlockCache::FindByCookie(int cookie) {
if (blocks_.empty())
return -1;
// TODO: Maybe a flag to determine target offset mode?
if (blocks_[0].GetTargetOffset() < 0)
return cookie;
for (int i = 0; i < GetNumBlocks(); ++i) {
int offset = blocks_[i].GetTargetOffset();
if (offset == cookie)
return i;
}
return -1;
}
std::vector<u32> IRBlockCache::SaveAndClearEmuHackOps() {
std::vector<u32> result;
result.resize(blocks_.size());
for (int number = 0; number < (int)blocks_.size(); ++number) {
IRBlock &b = blocks_[number];
if (b.IsValid() && b.RestoreOriginalFirstOp(number)) {
int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset();
if (b.IsValid() && b.RestoreOriginalFirstOp(cookie)) {
result[number] = number;
} else {
result[number] = 0;
@ -357,7 +378,8 @@ void IRBlockCache::RestoreSavedEmuHackOps(std::vector<u32> saved) {
IRBlock &b = blocks_[number];
// Only if we restored it, write it back.
if (b.IsValid() && saved[number] != 0 && b.HasOriginalFirstOp()) {
b.Finalize(number);
int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset();
b.Finalize(cookie);
}
}
}
@ -441,8 +463,8 @@ bool IRBlock::HasOriginalFirstOp() const {
return Memory::ReadUnchecked_U32(origAddr_) == origFirstOpcode_.encoding;
}
bool IRBlock::RestoreOriginalFirstOp(int number) {
const u32 emuhack = MIPS_EMUHACK_OPCODE | number;
bool IRBlock::RestoreOriginalFirstOp(int cookie) {
const u32 emuhack = MIPS_EMUHACK_OPCODE | cookie;
if (Memory::ReadUnchecked_U32(origAddr_) == emuhack) {
Memory::Write_Opcode_JIT(origAddr_, origFirstOpcode_);
return true;
@ -450,19 +472,19 @@ bool IRBlock::RestoreOriginalFirstOp(int number) {
return false;
}
void IRBlock::Finalize(int number) {
void IRBlock::Finalize(int cookie) {
// Check it wasn't invalidated, in case this is after preload.
// TODO: Allow reusing blocks when the code matches hash_ again, instead.
if (origAddr_) {
origFirstOpcode_ = Memory::Read_Opcode_JIT(origAddr_);
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | number);
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | cookie);
Memory::Write_Opcode_JIT(origAddr_, opcode);
}
}
void IRBlock::Destroy(int number) {
void IRBlock::Destroy(int cookie) {
if (origAddr_) {
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | number);
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | cookie);
if (Memory::ReadUnchecked_U32(origAddr_) == opcode.encoding)
Memory::Write_Opcode_JIT(origAddr_, origFirstOpcode_);
@ -496,7 +518,7 @@ bool IRBlock::OverlapsRange(u32 addr, u32 size) const {
}
MIPSOpcode IRJit::GetOriginalOp(MIPSOpcode op) {
IRBlock *b = blocks_.GetBlock(op.encoding & 0xFFFFFF);
IRBlock *b = blocks_.GetBlock(blocks_.FindByCookie(op.encoding & 0xFFFFFF));
if (b) {
return b->GetOriginalFirstOp();
}

View File

@ -38,15 +38,16 @@ namespace MIPSComp {
// TODO : Use arena allocators. For now let's just malloc.
class IRBlock {
public:
IRBlock() : instr_(nullptr), numInstructions_(0), origAddr_(0), origSize_(0) {}
IRBlock(u32 emAddr) : instr_(nullptr), numInstructions_(0), origAddr_(emAddr), origSize_(0) {}
IRBlock() {}
IRBlock(u32 emAddr) : origAddr_(emAddr) {}
IRBlock(IRBlock &&b) {
instr_ = b.instr_;
numInstructions_ = b.numInstructions_;
hash_ = b.hash_;
origAddr_ = b.origAddr_;
origSize_ = b.origSize_;
origFirstOpcode_ = b.origFirstOpcode_;
hash_ = b.hash_;
targetOffset_ = b.targetOffset_;
numInstructions_ = b.numInstructions_;
b.instr_ = nullptr;
}
@ -71,6 +72,12 @@ public:
void SetOriginalSize(u32 size) {
origSize_ = size;
}
void SetTargetOffset(int offset) {
targetOffset_ = offset;
}
int GetTargetOffset() const {
return targetOffset_;
}
void UpdateHash() {
hash_ = CalculateHash();
}
@ -90,12 +97,13 @@ public:
private:
u64 CalculateHash() const;
IRInst *instr_;
u16 numInstructions_;
u32 origAddr_;
u32 origSize_;
IRInst *instr_ = nullptr;
u64 hash_ = 0;
u32 origAddr_ = 0;
u32 origSize_ = 0;
MIPSOpcode origFirstOpcode_ = MIPSOpcode(0x68FFFFFF);
int targetOffset_ = -1;
u16 numInstructions_ = 0;
};
class IRBlockCache : public JitBlockCacheDebugInterface {
@ -118,6 +126,7 @@ public:
}
int FindPreloadBlock(u32 em_address);
int FindByCookie(int cookie);
std::vector<u32> SaveAndClearEmuHackOps();
void RestoreSavedEmuHackOps(std::vector<u32> saved);
@ -172,6 +181,7 @@ public:
protected:
virtual bool CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload);
virtual bool CompileTargetBlock(IRBlock *block, int block_num, bool preload) { return true; }
JitOptions jo;

View File

@ -131,7 +131,7 @@ void RiscVJit::GenerateFixedCode(const JitOptions &jo) {
// Fixed registers, these are always kept when in Jit context.
LI(MEMBASEREG, Memory::base, SCRATCH1);
LI(CTXREG, mips_, SCRATCH1);
LI(JITBASEREG, blockStartAddrs_, SCRATCH1);
LI(JITBASEREG, GetBasePtr(), SCRATCH1);
LoadStaticRegisters();
MovFromPC(SCRATCH1);
@ -183,35 +183,11 @@ void RiscVJit::GenerateFixedCode(const JitOptions &jo) {
// We're in other words comparing to the top 8 bits of MIPS_EMUHACK_OPCODE by subtracting.
ADDI(SCRATCH2, SCRATCH2, -(MIPS_EMUHACK_OPCODE >> 24));
FixupBranch needsCompile = BNE(SCRATCH2, R_ZERO);
// Use a wall to mask by 0x00FFFFFF and extract the block number.
// Use a wall to mask by 0x00FFFFFF and extract the block jit offset.
SLLI(SCRATCH1, SCRATCH1, XLEN - 24);
// But actually, we want * 8, so skip shifting back just a bit.
_assert_msg_(sizeof(blockStartAddrs_[0]) == 8, "RiscVAsm currently assumes pointers are 64-bit");
SRLI(SCRATCH1, SCRATCH1, XLEN - 24 - 3);
if (enableDebug) {
// Let's do some extra validation of the block number in debug mode for testing.
LI(SCRATCH2, MAX_ALLOWED_JIT_BLOCKS * 8);
FixupBranch highBlockNum = BGEU(SCRATCH1, SCRATCH2);
ADD(SCRATCH1, JITBASEREG, SCRATCH1);
// TODO: Consider replacing the block nums after all, just trying to use IR block cache.
LD(SCRATCH1, SCRATCH1, 0);
LI(SCRATCH2, 2);
FixupBranch invalidBlockNum = BEQ(SCRATCH1, R_ZERO);
JR(SCRATCH1);
SetJumpTarget(highBlockNum);
LI(SCRATCH2, 1);
SetJumpTarget(invalidBlockNum);
MV(X10, SCRATCH2);
QuickCallFunction(&ShowBlockError);
} else {
ADD(SCRATCH1, JITBASEREG, SCRATCH1);
// TODO: Consider replacing the block nums after all, just trying to use IR block cache.
LD(SCRATCH1, SCRATCH1, 0);
JR(SCRATCH1);
}
SRLI(SCRATCH1, SCRATCH1, XLEN - 24);
ADD(SCRATCH1, JITBASEREG, SCRATCH1);
JR(SCRATCH1);
SetJumpTarget(needsCompile);
// No block found, let's jit. We don't need to save static regs, they're all callee saved.

View File

@ -32,13 +32,11 @@ RiscVJit::RiscVJit(MIPSState *mipsState) : IRJit(mipsState), gpr(mipsState, &jo)
jo.enablePointerify = false;
}
// Since we store the offset, this is as big as it can be.
// We could shift off one bit to double it, would need to change RiscVAsm.
AllocCodeSpace(1024 * 1024 * 16);
SetAutoCompress(true);
// TODO: Consider replacing block num method form IRJit - this is 2MB.
blockStartAddrs_ = new const u8 *[MAX_ALLOWED_JIT_BLOCKS];
memset(blockStartAddrs_, 0, sizeof(blockStartAddrs_[0]) * MAX_ALLOWED_JIT_BLOCKS);
gpr.Init(this);
fpr.Init(this);
@ -46,7 +44,6 @@ RiscVJit::RiscVJit(MIPSState *mipsState) : IRJit(mipsState), gpr(mipsState, &jo)
}
RiscVJit::~RiscVJit() {
delete [] blockStartAddrs_;
}
void RiscVJit::RunLoopUntil(u64 globalticks) {
@ -54,34 +51,20 @@ void RiscVJit::RunLoopUntil(u64 globalticks) {
((void (*)())enterDispatcher_)();
}
bool RiscVJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload) {
// Check that we're not full (we allow less blocks than IR itself.)
if (blocks_.GetNumBlocks() >= MAX_ALLOWED_JIT_BLOCKS - 1)
bool RiscVJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) {
if (GetSpaceLeft() < 0x800)
return false;
if (!IRJit::CompileBlock(em_address, instructions, mipsBytes, preload))
return false;
// Don't worry, the codespace isn't large enough to overflow offsets.
block->SetTargetOffset((int)GetOffset(GetCodePointer()));
// TODO: Block linking, checked entries and such.
int block_num;
if (preload) {
block_num = blocks_.GetBlockNumberFromStartAddress(em_address);
} else {
u32 first_inst = Memory::ReadUnchecked_U32(em_address);
_assert_msg_(MIPS_IS_RUNBLOCK(first_inst), "Should've written an emuhack");
block_num = first_inst & MIPS_EMUHACK_VALUE_MASK;
}
_assert_msg_(block_num >= 0 && block_num < MAX_ALLOWED_JIT_BLOCKS, "Bad block num");
_assert_msg_(blockStartAddrs_[block_num] == nullptr, "Block %d reused before clear", block_num);
blockStartAddrs_[block_num] = GetCodePointer();
gpr.Start();
fpr.Start();
for (const IRInst &inst : instructions) {
for (int i = 0; i < block->GetNumInstructions(); ++i) {
const IRInst &inst = block->GetInstructions()[i];
CompileIRInst(inst);
if (jo.Disabled(JitDisable::REGALLOC_GPR)) {
@ -442,17 +425,14 @@ bool RiscVJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
} else if (!IsInSpace(ptr)) {
return false;
} else {
uintptr_t uptr = (uintptr_t)ptr;
int offset = (int)GetOffset(ptr);
int block_num = -1;
for (int i = 0; i < MAX_ALLOWED_JIT_BLOCKS; ++i) {
uintptr_t blockptr = (uintptr_t)blockStartAddrs_[i];
// Out of allocated blocks.
if (uptr == 0)
break;
if (uptr >= blockptr)
for (int i = 0; i < blocks_.GetNumBlocks(); ++i) {
const auto &b = blocks_.GetBlock(i);
// We allocate linearly.
if (b->GetTargetOffset() <= offset)
block_num = i;
if (uptr < blockptr)
if (b->GetTargetOffset() > offset)
break;
}
@ -494,8 +474,6 @@ void RiscVJit::ClearCache() {
ClearCodeSpace(jitStartOffset_);
FlushIcacheSection(region + jitStartOffset_, region + region_size - jitStartOffset_);
memset(blockStartAddrs_, 0, sizeof(blockStartAddrs_[0]) * MAX_ALLOWED_JIT_BLOCKS);
}
void RiscVJit::RestoreRoundingMode(bool force) {

View File

@ -46,7 +46,7 @@ public:
// TODO: GetBlockCacheDebugInterface, block linking?
protected:
bool CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload) override;
bool CompileTargetBlock(IRBlock *block, int block_num, bool preload) override;
void CompileIRInst(IRInst inst);
@ -115,8 +115,6 @@ private:
RiscVRegCache gpr;
RiscVRegCacheFPU fpr;
static constexpr int MAX_ALLOWED_JIT_BLOCKS = 262144;
const u8 *enterDispatcher_ = nullptr;
const u8 *outerLoop_ = nullptr;
@ -134,7 +132,6 @@ private:
const u8 *crashHandler_ = nullptr;
int jitStartOffset_ = 0;
const u8 **blockStartAddrs_ = nullptr;
};
} // namespace MIPSComp