riscv: Enable block linking.

This commit is contained in:
Unknown W. Brackets 2023-08-12 09:07:43 -07:00
parent 247788806a
commit fcc90095f7
6 changed files with 242 additions and 52 deletions

View File

@ -97,6 +97,8 @@ uint32_t IRNativeBackend::DoIRInst(uint64_t value) {
return IRInterpret(currentMIPS, &inst, 1);
}
IRNativeBackend::IRNativeBackend(IRBlockCache &blocks) : blocks_(blocks) {}
void IRNativeBackend::CompileIRInst(IRInst inst) {
switch (inst.op) {
case IROp::Nop:
@ -393,7 +395,7 @@ IRNativeJit::IRNativeJit(MIPSState *mipsState)
void IRNativeJit::Init(IRNativeBackend &backend) {
backend_ = &backend;
debugInterface_.Init(&backend_->CodeBlock());
debugInterface_.Init(backend_);
backend_->GenerateFixedCode(mips_);
// Wanted this to be a reference, but vtbls get in the way. Shouldn't change.
@ -405,7 +407,7 @@ bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload
}
void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) {
backend_->FinalizeBlock(block, block_num);
backend_->FinalizeBlock(block, block_num, jo);
}
void IRNativeJit::RunLoopUntil(u64 globalticks) {
@ -426,9 +428,15 @@ void IRNativeJit::InvalidateCacheAt(u32 em_address, int length) {
std::vector<int> numbers = blocks_.FindInvalidatedBlockNumbers(em_address, length);
for (int block_num : numbers) {
auto block = blocks_.GetBlock(block_num);
backend_->InvalidateBlock(block, block_num);
if (em_address != 0 || length < 0x1FFFFFFF) {
backend_->InvalidateBlock(block, block_num);
}
block->Destroy(block->GetTargetOffset());
}
if (em_address == 0 && length >= 0x1FFFFFFF) {
backend_->ClearAllBlocks();
}
}
bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
@ -514,13 +522,73 @@ int IRNativeBackend::OffsetFromCodePtr(const u8 *ptr) {
return (int)codeBlock.GetOffset(ptr);
}
} // namespace MIPSComp
void IRNativeBackend::FinalizeBlock(IRBlock *block, int block_num, const JitOptions &jo) {
if (jo.enableBlocklink) {
uint32_t pc = block->GetOriginalStart();
// First, link other blocks to this one now that it's finalized.
auto incoming = linksTo_.equal_range(pc);
for (auto it = incoming.first; it != incoming.second; ++it) {
auto &exits = nativeBlocks_[it->second].exits;
for (auto &blockExit : exits) {
if (blockExit.dest == pc)
OverwriteExit(blockExit.offset, blockExit.len, block_num);
}
}
// And also any blocks to this one, in case we're finalizing it later.
auto &outgoing = nativeBlocks_[block_num].exits;
for (auto &blockExit : outgoing) {
int dstBlockNum = blocks_.GetBlockNumberFromStartAddress(blockExit.dest);
const IRNativeBlock *nativeBlock = GetNativeBlock(dstBlockNum);
if (nativeBlock)
OverwriteExit(blockExit.offset, blockExit.len, dstBlockNum);
}
}
}
const IRNativeBlock *IRNativeBackend::GetNativeBlock(int block_num) const {
if (block_num < 0 || block_num >= (int)nativeBlocks_.size())
return nullptr;
return &nativeBlocks_[block_num];
}
void IRNativeBackend::SetBlockCheckedOffset(int block_num, int offset) {
if (block_num >= (int)nativeBlocks_.size())
nativeBlocks_.resize(block_num + 1);
nativeBlocks_[block_num].checkedOffset = offset;
}
void IRNativeBackend::AddLinkableExit(int block_num, uint32_t pc, int exitStartOffset, int exitLen) {
linksTo_.insert(std::make_pair(block_num, pc));
if (block_num >= (int)nativeBlocks_.size())
nativeBlocks_.resize(block_num + 1);
IRNativeBlockExit blockExit;
blockExit.offset = exitStartOffset;
blockExit.len = exitLen;
blockExit.dest = pc;
nativeBlocks_[block_num].exits.push_back(blockExit);
}
void IRNativeBackend::EraseAllLinks(int block_num) {
if (block_num == -1) {
linksTo_.clear();
nativeBlocks_.clear();
} else {
linksTo_.erase(block_num);
if (block_num < (int)nativeBlocks_.size())
nativeBlocks_[block_num].exits.clear();
}
}
IRNativeBlockCacheDebugInterface::IRNativeBlockCacheDebugInterface(const IRBlockCache &irBlocks)
: irBlocks_(irBlocks) {}
void IRNativeBlockCacheDebugInterface::Init(const CodeBlockCommon *codeBlock) {
codeBlock_ = codeBlock;
void IRNativeBlockCacheDebugInterface::Init(const IRNativeBackend *backend) {
codeBlock_ = &backend->CodeBlock();
backend_ = backend;
}
int IRNativeBlockCacheDebugInterface::GetNumBlocks() const {
@ -533,14 +601,18 @@ int IRNativeBlockCacheDebugInterface::GetBlockNumberFromStartAddress(u32 em_addr
void IRNativeBlockCacheDebugInterface::GetBlockCodeRange(int blockNum, int *startOffset, int *size) const {
int blockOffset = irBlocks_.GetBlock(blockNum)->GetTargetOffset();
int endOffset;
// We assume linear allocation. Maybe a bit dangerous, should always be right.
if (blockNum + 1 >= GetNumBlocks()) {
// Last block, get from current code pointer.
endOffset = (int)codeBlock_->GetOffset(codeBlock_->GetCodePtr());
} else {
endOffset = irBlocks_.GetBlock(blockNum + 1)->GetTargetOffset();
_assert_msg_(endOffset >= blockOffset, "Next block not sequential, block=%d/%08x, next=%d/%08x", blockNum, blockOffset, blockNum + 1, endOffset);
int endOffset = backend_->GetNativeBlock(blockNum)->checkedOffset;
// If endOffset is before, the checked entry is before the block start.
if (endOffset < blockOffset) {
// We assume linear allocation. Maybe a bit dangerous, should always be right.
if (blockNum + 1 >= GetNumBlocks()) {
// Last block, get from current code pointer.
endOffset = (int)codeBlock_->GetOffset(codeBlock_->GetCodePtr());
} else {
endOffset = irBlocks_.GetBlock(blockNum + 1)->GetTargetOffset();
_assert_msg_(endOffset >= blockOffset, "Next block not sequential, block=%d/%08x, next=%d/%08x", blockNum, blockOffset, blockNum + 1, endOffset);
}
}
*startOffset = blockOffset;
@ -553,7 +625,6 @@ JitBlockDebugInfo IRNativeBlockCacheDebugInterface::GetBlockDebugInfo(int blockN
int blockOffset, codeSize;
GetBlockCodeRange(blockNum, &blockOffset, &codeSize);
// TODO: Normal entry?
const u8 *blockStart = codeBlock_->GetBasePtr() + blockOffset;
#if PPSSPP_ARCH(ARM)
debugInfo.targetDisasm = DisassembleArm2(blockStart, codeSize);
@ -602,3 +673,5 @@ void IRNativeBlockCacheDebugInterface::ComputeStats(BlockCacheStats &bcStats) co
bcStats.maxBloat = (float)maxBloat;
bcStats.avgBloat = (float)(totalBloat / (double)numBlocks);
}
} // namespace MIPSComp

View File

@ -15,25 +15,10 @@
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <unordered_map>
#include "Core/MIPS/IR/IRJit.h"
#include "Core/MIPS/JitCommon/JitBlockCache.h"
class IRNativeBlockCacheDebugInterface : public JitBlockCacheDebugInterface {
public:
IRNativeBlockCacheDebugInterface(const MIPSComp::IRBlockCache &irBlocks);
void Init(const CodeBlockCommon *codeBlock);
int GetNumBlocks() const;
int GetBlockNumberFromStartAddress(u32 em_address, bool realBlocksOnly = true) const;
JitBlockDebugInfo GetBlockDebugInfo(int blockNum) const;
void ComputeStats(BlockCacheStats &bcStats) const;
private:
void GetBlockCodeRange(int blockNum, int *startOffset, int *size) const;
const MIPSComp::IRBlockCache &irBlocks_;
const CodeBlockCommon *codeBlock_ = nullptr;
};
namespace MIPSComp {
typedef void (*IRNativeFuncNoArg)();
@ -46,8 +31,20 @@ struct IRNativeHooks {
const uint8_t *crashHandler = nullptr;
};
struct IRNativeBlockExit {
int offset;
int len;
uint32_t dest;
};
struct IRNativeBlock {
int checkedOffset = 0;
std::vector<IRNativeBlockExit> exits;
};
class IRNativeBackend {
public:
IRNativeBackend(IRBlockCache &blocks);
virtual ~IRNativeBackend() {}
void CompileIRInst(IRInst inst);
@ -58,14 +55,17 @@ public:
virtual void GenerateFixedCode(MIPSState *mipsState) = 0;
virtual bool CompileBlock(IRBlock *block, int block_num, bool preload) = 0;
virtual void FinalizeBlock(IRBlock *block, int block_num) = 0;
virtual void ClearAllBlocks() = 0;
virtual void InvalidateBlock(IRBlock *block, int block_num) = 0;
void FinalizeBlock(IRBlock *block, int block_num, const JitOptions &jo);
const IRNativeHooks &GetNativeHooks() const {
return hooks_;
}
const IRNativeBlock *GetNativeBlock(int block_num) const;
void SetBlockCheckedOffset(int block_num, int offset);
virtual const CodeBlockCommon &CodeBlock() const = 0;
protected:
@ -112,6 +112,8 @@ protected:
virtual void CompIR_VecStore(IRInst inst) = 0;
virtual void CompIR_ValidateAddress(IRInst inst) = 0;
virtual void OverwriteExit(int srcOffset, int len, int block_num) = 0;
// Returns true when debugging statistics should be compiled in.
bool DebugStatsEnabled() const;
@ -125,7 +127,30 @@ protected:
// Callback to log AND perform an IR interpreter inst. Returns 0 or a PC to jump to.
static uint32_t DoIRInst(uint64_t inst);
void AddLinkableExit(int block_num, uint32_t pc, int exitStartOffset, int exitLen);
void EraseAllLinks(int block_num);
IRNativeHooks hooks_;
IRBlockCache &blocks_;
std::vector<IRNativeBlock> nativeBlocks_;
std::unordered_multimap<uint32_t, int> linksTo_;
};
class IRNativeBlockCacheDebugInterface : public JitBlockCacheDebugInterface {
public:
IRNativeBlockCacheDebugInterface(const MIPSComp::IRBlockCache &irBlocks);
void Init(const IRNativeBackend *backend);
int GetNumBlocks() const;
int GetBlockNumberFromStartAddress(u32 em_address, bool realBlocksOnly = true) const;
JitBlockDebugInfo GetBlockDebugInfo(int blockNum) const;
void ComputeStats(BlockCacheStats &bcStats) const;
private:
void GetBlockCodeRange(int blockNum, int *startOffset, int *size) const;
const MIPSComp::IRBlockCache &irBlocks_;
const CodeBlockCommon *codeBlock_ = nullptr;
const IRNativeBackend *backend_ = nullptr;
};
class IRNativeJit : public IRJit {

View File

@ -64,6 +64,10 @@ namespace MIPSComp {
useStaticAlloc = !Disabled(JitDisable::STATIC_ALLOC);
// iOS/etc. may disable at runtime if Memory::base is not nicely aligned.
enablePointerify = !Disabled(JitDisable::POINTERIFY);
#endif
#if PPSSPP_ARCH(RISCV64)
// Seems to perform slightly better than a checked entry at the start.
useBackJump = true;
#endif
}

View File

@ -41,8 +41,7 @@ void RiscVJitBackend::CompIR_Exit(IRInst inst) {
switch (inst.op) {
case IROp::ExitToConst:
FlushAll();
LI(SCRATCH1, inst.constant);
QuickJ(R_RA, dispatcherPCInSCRATCH1_);
WriteConstExit(inst.constant);
break;
case IROp::ExitToReg:
@ -92,8 +91,7 @@ void RiscVJitBackend::CompIR_ExitIf(IRInst inst) {
break;
}
LI(SCRATCH1, inst.constant);
QuickJ(R_RA, dispatcherPCInSCRATCH1_);
WriteConstExit(inst.constant);
SetJumpTarget(fixup);
break;
@ -127,8 +125,7 @@ void RiscVJitBackend::CompIR_ExitIf(IRInst inst) {
break;
}
LI(SCRATCH1, inst.constant);
QuickJ(R_RA, dispatcherPCInSCRATCH1_);
WriteConstExit(inst.constant);
SetJumpTarget(fixup);
break;

View File

@ -25,8 +25,12 @@ namespace MIPSComp {
using namespace RiscVGen;
using namespace RiscVJitConstants;
RiscVJitBackend::RiscVJitBackend(MIPSState *mipsState, JitOptions &jitopt)
: jo(jitopt), gpr(mipsState, &jo), fpr(mipsState, &jo) {
// Needs space for a LI and J which might both be 32-bit offsets.
static constexpr int MIN_BLOCK_NORMAL_LEN = 16;
static constexpr int MIN_BLOCK_EXIT_LEN = 8;
RiscVJitBackend::RiscVJitBackend(MIPSState *mipsState, JitOptions &jitopt, IRBlockCache &blocks)
: IRNativeBackend(blocks), jo(jitopt), gpr(mipsState, &jo), fpr(mipsState, &jo) {
// Automatically disable incompatible options.
if (((intptr_t)Memory::base & 0x00000000FFFFFFFFUL) != 0) {
jo.enablePointerify = false;
@ -52,10 +56,23 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
if (GetSpaceLeft() < 0x800)
return false;
// Don't worry, the codespace isn't large enough to overflow offsets.
block->SetTargetOffset((int)GetOffset(GetCodePointer()));
u32 startPC = block->GetOriginalStart();
bool wroteCheckedOffset = false;
FixupBranch lateCheckFail;
if (jo.enableBlocklink && !jo.useBackJump) {
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
wroteCheckedOffset = true;
// TODO: Block linking, checked entries and such.
FixupBranch normalEntry = BGE(DOWNCOUNTREG, R_ZERO);
LI(SCRATCH1, startPC);
QuickJ(R_RA, outerLoopPCInSCRATCH1_);
SetJumpTarget(normalEntry);
}
// Don't worry, the codespace isn't large enough to overflow offsets.
const u8 *blockStart = GetCodePointer();
block->SetTargetOffset((int)GetOffset(blockStart));
compilingBlockNum_ = block_num;
gpr.Start(block);
fpr.Start(block);
@ -74,6 +91,7 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
if (GetSpaceLeft() < 0x800) {
compilingBlockNum_ = -1;
return false;
}
}
@ -86,18 +104,82 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
}
int len = (int)GetOffset(GetCodePointer()) - block->GetTargetOffset();
if (len < 16) {
if (len < MIN_BLOCK_NORMAL_LEN) {
// We need at least 16 bytes to invalidate blocks with, but larger doesn't need to align.
AlignCode16();
ReserveCodeSpace(MIN_BLOCK_NORMAL_LEN - len);
}
if (!wroteCheckedOffset) {
// Always record this, even if block link disabled - it's used for size calc.
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
}
if (jo.enableBlocklink && jo.useBackJump) {
// Most blocks shouldn't be >= 4KB, so usually we can just BGE.
if (BInRange(blockStart)) {
BGE(DOWNCOUNTREG, R_ZERO, blockStart);
} else {
FixupBranch skip = BLT(DOWNCOUNTREG, R_ZERO);
J(blockStart);
SetJumpTarget(skip);
}
LI(SCRATCH1, startPC);
QuickJ(R_RA, outerLoopPCInSCRATCH1_);
}
FlushIcache();
compilingBlockNum_ = -1;
return true;
}
void RiscVJitBackend::FinalizeBlock(IRBlock *block, int block_num) {
// TODO
void RiscVJitBackend::WriteConstExit(uint32_t pc) {
int block_num = blocks_.GetBlockNumberFromStartAddress(pc);
const IRNativeBlock *nativeBlock = GetNativeBlock(block_num);
int exitStart = (int)GetOffset(GetCodePointer());
if (block_num >= 0 && jo.enableBlocklink && nativeBlock && nativeBlock->checkedOffset != 0) {
// Don't bother recording, we don't every overwrite to "unlink".
// Instead, we would mark the target block to jump to the dispatcher.
QuickJ(SCRATCH1, GetBasePtr() + nativeBlock->checkedOffset);
} else {
LI(SCRATCH1, pc);
QuickJ(R_RA, dispatcherPCInSCRATCH1_);
}
if (jo.enableBlocklink) {
// In case of compression or early link, make sure it's large enough.
int len = (int)GetOffset(GetCodePointer()) - exitStart;
if (len < MIN_BLOCK_EXIT_LEN) {
ReserveCodeSpace(MIN_BLOCK_EXIT_LEN - len);
len = MIN_BLOCK_EXIT_LEN;
}
AddLinkableExit(compilingBlockNum_, pc, exitStart, len);
}
}
void RiscVJitBackend::OverwriteExit(int srcOffset, int len, int block_num) {
_dbg_assert_(len >= MIN_BLOCK_EXIT_LEN);
const IRNativeBlock *nativeBlock = GetNativeBlock(block_num);
if (nativeBlock) {
u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + srcOffset;
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(writable, len, MEM_PROT_READ | MEM_PROT_WRITE);
}
RiscVEmitter emitter(GetBasePtr() + srcOffset, writable);
emitter.QuickJ(SCRATCH1, GetBasePtr() + nativeBlock->checkedOffset);
int bytesWritten = (int)(emitter.GetWritableCodePtr() - writable);
if (bytesWritten < len)
emitter.ReserveCodeSpace(len - bytesWritten);
emitter.FlushIcache();
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(writable, 16, MEM_PROT_READ | MEM_PROT_EXEC);
}
}
}
void RiscVJitBackend::CompIR_Generic(IRInst inst) {
@ -166,6 +248,7 @@ bool RiscVJitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
void RiscVJitBackend::ClearAllBlocks() {
ClearCodeSpace(jitStartOffset_);
FlushIcacheSection(region + jitStartOffset_, region + region_size - jitStartOffset_);
EraseAllLinks(-1);
}
void RiscVJitBackend::InvalidateBlock(IRBlock *block, int block_num) {
@ -182,15 +265,20 @@ void RiscVJitBackend::InvalidateBlock(IRBlock *block, int block_num) {
RiscVEmitter emitter(GetBasePtr() + offset, writable);
// We sign extend to ensure it will fit in 32-bit and 8 bytes LI.
// TODO: Would need to change if dispatcher doesn't reload PC.
// TODO: May need to change if dispatcher doesn't reload PC.
emitter.LI(SCRATCH1, (int32_t)pc);
emitter.J(dispatcherPCInSCRATCH1_);
emitter.QuickJ(R_RA, dispatcherPCInSCRATCH1_);
int bytesWritten = (int)(emitter.GetWritableCodePtr() - writable);
if (bytesWritten < MIN_BLOCK_NORMAL_LEN)
emitter.ReserveCodeSpace(MIN_BLOCK_NORMAL_LEN - bytesWritten);
emitter.FlushIcache();
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(writable, 16, MEM_PROT_READ | MEM_PROT_EXEC);
}
}
EraseAllLinks(block_num);
}
void RiscVJitBackend::RestoreRoundingMode(bool force) {

View File

@ -31,14 +31,13 @@ namespace MIPSComp {
class RiscVJitBackend : public RiscVGen::RiscVCodeBlock, public IRNativeBackend {
public:
RiscVJitBackend(MIPSState *mipsState, JitOptions &jo);
RiscVJitBackend(MIPSState *mipsState, JitOptions &jo, IRBlockCache &blocks);
~RiscVJitBackend();
bool DescribeCodePtr(const u8 *ptr, std::string &name) const override;
void GenerateFixedCode(MIPSState *mipsState) override;
bool CompileBlock(IRBlock *block, int block_num, bool preload) override;
void FinalizeBlock(IRBlock *block, int block_num) override;
void ClearAllBlocks() override;
void InvalidateBlock(IRBlock *block, int block_num) override;
@ -59,6 +58,9 @@ private:
// Note: destroys SCRATCH1.
void FlushAll();
void WriteConstExit(uint32_t pc);
void OverwriteExit(int srcOffset, int len, int block_num) override;
void CompIR_Arith(IRInst inst) override;
void CompIR_Assign(IRInst inst) override;
void CompIR_Basic(IRInst inst) override;
@ -124,12 +126,13 @@ private:
const u8 *loadStaticRegisters_ = nullptr;
int jitStartOffset_ = 0;
int compilingBlockNum_ = -1;
};
class RiscVJit : public IRNativeJit {
public:
RiscVJit(MIPSState *mipsState)
: IRNativeJit(mipsState), rvBackend_(mipsState, jo) {
: IRNativeJit(mipsState), rvBackend_(mipsState, jo, blocks_) {
Init(rvBackend_);
}