diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d2e98ac9c..055935e731 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1609,6 +1609,19 @@ list(APPEND CoreExtra ) list(APPEND CoreExtra + Core/MIPS/RiscV/RiscVAsm.cpp + Core/MIPS/RiscV/RiscVCompALU.cpp + Core/MIPS/RiscV/RiscVCompBranch.cpp + Core/MIPS/RiscV/RiscVCompFPU.cpp + Core/MIPS/RiscV/RiscVCompLoadStore.cpp + Core/MIPS/RiscV/RiscVCompSystem.cpp + Core/MIPS/RiscV/RiscVCompVec.cpp + Core/MIPS/RiscV/RiscVJit.cpp + Core/MIPS/RiscV/RiscVJit.h + Core/MIPS/RiscV/RiscVRegCache.cpp + Core/MIPS/RiscV/RiscVRegCache.h + Core/MIPS/RiscV/RiscVRegCacheFPU.cpp + Core/MIPS/RiscV/RiscVRegCacheFPU.h GPU/Common/VertexDecoderRiscV.cpp ) diff --git a/Common/RiscVEmitter.cpp b/Common/RiscVEmitter.cpp index 7b201c0105..0029da5287 100644 --- a/Common/RiscVEmitter.cpp +++ b/Common/RiscVEmitter.cpp @@ -1180,6 +1180,19 @@ bool RiscVEmitter::CJInRange(const void *src, const void *dst) const { return BJInRange(src, dst, 12); } +void RiscVEmitter::QuickJAL(RiscVReg scratchreg, RiscVReg rd, const u8 *dst) { + if (!JInRange(GetCodePointer(), dst)) { + static_assert(sizeof(intptr_t) <= sizeof(int64_t)); + int64_t pcdelta = (int64_t)dst - (int64_t)GetCodePointer(); + int32_t lower = (int32_t)SignReduce64(pcdelta, 12); + uintptr_t upper = ((pcdelta - lower) >> 12) << 12; + LI(scratchreg, (uintptr_t)GetCodePointer() + upper); + JALR(rd, scratchreg, lower); + } else { + JAL(rd, dst); + } +} + void RiscVEmitter::SetRegToImmediate(RiscVReg rd, uint64_t value, RiscVReg temp) { int64_t svalue = (int64_t)value; _assert_msg_(IsGPR(rd) && IsGPR(temp), "SetRegToImmediate only supports GPRs"); diff --git a/Common/RiscVEmitter.h b/Common/RiscVEmitter.h index 16ad0176e5..fedb0b0c7f 100644 --- a/Common/RiscVEmitter.h +++ b/Common/RiscVEmitter.h @@ -213,6 +213,19 @@ public: bool BInRange(const void *func) const; bool JInRange(const void *func) const; + void QuickJAL(RiscVReg scratchreg, RiscVReg rd, const u8 *dst); + void QuickJ(RiscVReg scratchreg, const u8 *dst) { + QuickJAL(scratchreg, R_ZERO, dst); + } + void QuickCallFunction(const u8 *func) { + QuickJAL(R_RA, R_RA, func); + } + template + void QuickCallFunction(T *func) { + static_assert(std::is_function::value, "QuickCallFunction without function"); + QuickCallFunction((const u8 *)func); + } + void LUI(RiscVReg rd, s32 simm32); void AUIPC(RiscVReg rd, s32 simm32); diff --git a/Core/Config.cpp b/Core/Config.cpp index 81b41e2646..da1c279033 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -129,7 +129,7 @@ std::string CreateRandMAC() { } static int DefaultCpuCore() { -#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64) || PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) +#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64) || PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) || PPSSPP_ARCH(RISCV64) if (System_GetPropertyBool(SYSPROP_CAN_JIT)) return (int)CPUCore::JIT; return (int)CPUCore::IR_JIT; @@ -139,7 +139,7 @@ static int DefaultCpuCore() { } static bool DefaultCodeGen() { -#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64) || PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) +#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64) || PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) || PPSSPP_ARCH(RISCV64) return true; #else return false; diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index 5a347d4006..b2ae24174f 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -593,6 +593,16 @@ + + + + + + + + + + @@ -1161,6 +1171,9 @@ + + + diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters index 49c2a10c7b..a4f56f5eb1 100644 --- a/Core/Core.vcxproj.filters +++ b/Core/Core.vcxproj.filters @@ -91,6 +91,9 @@ {678fa299-0ff7-4983-982d-2da47b52e238} + + {067e3128-3aaf-4ed1-b19e-bab11606abe7} + @@ -1204,6 +1207,36 @@ Core + + MIPS\RiscV + + + MIPS\RiscV + + + MIPS\RiscV + + + MIPS\RiscV + + + MIPS\RiscV + + + MIPS\RiscV + + + MIPS\RiscV + + + MIPS\RiscV + + + MIPS\RiscV + + + MIPS\RiscV + @@ -1950,6 +1983,15 @@ Core + + MIPS\RiscV + + + MIPS\RiscV + + + MIPS\RiscV + diff --git a/Core/MIPS/ARM64/Arm64Jit.h b/Core/MIPS/ARM64/Arm64Jit.h index a9c97a85ad..3f81cd0a75 100644 --- a/Core/MIPS/ARM64/Arm64Jit.h +++ b/Core/MIPS/ARM64/Arm64Jit.h @@ -213,7 +213,9 @@ private: bool ReplaceJalTo(u32 dest); + // Clobbers SCRATCH2. void SaveStaticRegisters(); + // Clobbers SCRATCH2. void LoadStaticRegisters(); void WriteExit(u32 destination, int exit_num); diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index ac97e33e74..3bfede4dbb 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -777,7 +777,7 @@ void Arm64RegCache::FlushAll() { // Re-pointerify emit_->MOVK(EncodeRegTo64(allocs[i].ar), ((uint64_t)Memory::base) >> 32, SHIFT_32); ar[allocs[i].ar].pointerified = true; - } else { + } else if (!allocs[i].pointerified) { // If this register got pointerified on the way, mark it as not, so that after save/reload (like in an interpreter fallback), it won't be regarded as such, as it simply won't be. ar[allocs[i].ar].pointerified = false; } diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 8c7b67b00a..85549c8d6a 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -283,7 +283,9 @@ enum IRFpCompareMode { LessEqualUnordered, // ule, ngt (less equal, unordered) }; -enum { +typedef u8 IRReg; + +enum : IRReg { IRTEMP_0 = 192, IRTEMP_1, IRTEMP_2, @@ -332,11 +334,11 @@ struct IRMeta { struct IRInst { IROp op; union { - u8 dest; - u8 src3; + IRReg dest; + IRReg src3; }; - u8 src1; - u8 src2; + IRReg src1; + IRReg src2; u32 constant; }; diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 6a82ec1f7c..ffab92975c 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -262,11 +262,6 @@ void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) { Crash(); } -bool IRJit::ReplaceJalTo(u32 dest) { - Crash(); - return false; -} - void IRBlockCache::Clear() { for (int i = 0; i < (int)blocks_.size(); ++i) { blocks_[i].Destroy(i); diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 973ddf524e..756a584ddf 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -170,9 +170,8 @@ public: void LinkBlock(u8 *exitPoint, const u8 *checkedEntry) override; void UnlinkBlock(u8 *checkedEntry, u32 originalAddress) override; -private: - bool CompileBlock(u32 em_address, std::vector &instructions, u32 &mipsBytes, bool preload); - bool ReplaceJalTo(u32 dest); +protected: + virtual bool CompileBlock(u32 em_address, std::vector &instructions, u32 &mipsBytes, bool preload); JitOptions jo; @@ -187,4 +186,3 @@ private: }; } // namespace MIPSComp - diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index ca54392be1..d731c51a74 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -2,7 +2,7 @@ #include "Core/MIPS/IR/IRRegCache.h" #include "Core/MIPS/IR/IRInst.h" -void IRRegCache::Flush(int rd) { +void IRRegCache::Flush(IRReg rd) { if (rd == 0) { return; } @@ -12,7 +12,7 @@ void IRRegCache::Flush(int rd) { } } -void IRRegCache::Discard(int rd) { +void IRRegCache::Discard(IRReg rd) { if (rd == 0) { return; } @@ -31,33 +31,33 @@ void IRRegCache::FlushAll() { } } -void IRRegCache::MapIn(int rd) { +void IRRegCache::MapIn(IRReg rd) { Flush(rd); } -void IRRegCache::MapDirty(int rd) { +void IRRegCache::MapDirty(IRReg rd) { Discard(rd); } -void IRRegCache::MapInIn(int rs, int rt) { +void IRRegCache::MapInIn(IRReg rs, IRReg rt) { Flush(rs); Flush(rt); } -void IRRegCache::MapInInIn(int rd, int rs, int rt) { +void IRRegCache::MapInInIn(IRReg rd, IRReg rs, IRReg rt) { Flush(rd); Flush(rs); Flush(rt); } -void IRRegCache::MapDirtyIn(int rd, int rs) { +void IRRegCache::MapDirtyIn(IRReg rd, IRReg rs) { if (rs != rd) { Discard(rd); } Flush(rs); } -void IRRegCache::MapDirtyInIn(int rd, int rs, int rt) { +void IRRegCache::MapDirtyInIn(IRReg rd, IRReg rs, IRReg rt) { if (rs != rd && rt != rd) { Discard(rd); } diff --git a/Core/MIPS/IR/IRRegCache.h b/Core/MIPS/IR/IRRegCache.h index 9fcdab8b1e..80fad1d6ef 100644 --- a/Core/MIPS/IR/IRRegCache.h +++ b/Core/MIPS/IR/IRRegCache.h @@ -5,6 +5,7 @@ #include "Common/CommonTypes.h" #include "Core/MIPS/MIPS.h" +#include "Core/MIPS/IR/IRInst.h" enum { TOTAL_MAPPABLE_MIPSREGS = 256, @@ -22,26 +23,26 @@ class IRRegCache { public: IRRegCache(IRWriter *ir); - void SetImm(int r, u32 immVal) { + void SetImm(IRReg r, u32 immVal) { reg_[r].isImm = true; reg_[r].immVal = immVal; } - bool IsImm(int r) const { return reg_[r].isImm; } - u32 GetImm(int r) const { return reg_[r].immVal; } + bool IsImm(IRReg r) const { return reg_[r].isImm; } + u32 GetImm(IRReg r) const { return reg_[r].immVal; } void FlushAll(); - void MapDirty(int rd); - void MapIn(int rd); - void MapInIn(int rs, int rt); - void MapInInIn(int rd, int rs, int rt); - void MapDirtyIn(int rd, int rs); - void MapDirtyInIn(int rd, int rs, int rt); + void MapDirty(IRReg rd); + void MapIn(IRReg rd); + void MapInIn(IRReg rs, IRReg rt); + void MapInInIn(IRReg rd, IRReg rs, IRReg rt); + void MapDirtyIn(IRReg rd, IRReg rs); + void MapDirtyInIn(IRReg rd, IRReg rs, IRReg rt); private: - void Flush(int rd); - void Discard(int rd); + void Flush(IRReg rd); + void Discard(IRReg rd); RegIR reg_[TOTAL_MAPPABLE_MIPSREGS]; IRWriter *ir_; }; diff --git a/Core/MIPS/JitCommon/JitCommon.cpp b/Core/MIPS/JitCommon/JitCommon.cpp index 20a8fb6e74..4156732d5d 100644 --- a/Core/MIPS/JitCommon/JitCommon.cpp +++ b/Core/MIPS/JitCommon/JitCommon.cpp @@ -45,6 +45,8 @@ #include "../x86/Jit.h" #elif PPSSPP_ARCH(MIPS) #include "../MIPS/MipsJit.h" +#elif PPSSPP_ARCH(RISCV64) +#include "../RiscV/RiscVJit.h" #else #include "../fake/FakeJit.h" #endif @@ -108,6 +110,8 @@ namespace MIPSComp { return new MIPSComp::Jit(mipsState); #elif PPSSPP_ARCH(MIPS) return new MIPSComp::MipsJit(mipsState); +#elif PPSSPP_ARCH(RISCV64) + return new MIPSComp::RiscVJit(mipsState); #else return new MIPSComp::FakeJit(mipsState); #endif diff --git a/Core/MIPS/RiscV/RiscVAsm.cpp b/Core/MIPS/RiscV/RiscVAsm.cpp new file mode 100644 index 0000000000..7fe1ce8b53 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVAsm.cpp @@ -0,0 +1,308 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Common/Log.h" +#include "Core/CoreTiming.h" +#include "Core/MemMap.h" +#include "Core/MIPS/RiscV/RiscVJit.h" +#include "Core/MIPS/RiscV/RiscVRegCache.h" +#include "Core/MIPS/JitCommon/JitCommon.h" +#include "Core/MIPS/JitCommon/JitState.h" +#include "Core/System.h" + +namespace MIPSComp { + +using namespace RiscVGen; +using namespace RiscVJitConstants; + +static const bool enableDebug = false; +static const bool enableDisasm = false; + +static void ShowPC(u32 downcount, void *membase, void *jitbase) { + static int count = 0; + if (currentMIPS) { + ERROR_LOG(JIT, "[%08x] ShowPC Downcount : %08x %d %p %p", currentMIPS->pc, downcount, count, membase, jitbase); + } else { + ERROR_LOG(JIT, "Universe corrupt?"); + } + //if (count > 2000) + // exit(0); + count++; +} + +static void ShowBlockError(int type) { + if (type == 1) { + ERROR_LOG(JIT, "[%08x] ShowBlockError: block num was out of range in emuhack", currentMIPS->pc); + } else if (type == 2) { + ERROR_LOG(JIT, "[%08x] ShowBlockError: block num pointed to null jitblock", currentMIPS->pc); + } else { + ERROR_LOG(JIT, "[%08x] ShowBlockError: invalid error type", currentMIPS->pc); + } +} + +void RiscVJit::GenerateFixedCode(const JitOptions &jo) { + BeginWrite(GetMemoryProtectPageSize()); + const u8 *start = AlignCodePage(); + + if (jo.useStaticAlloc) { + saveStaticRegisters_ = AlignCode16(); + SW(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + gpr.EmitSaveStaticRegisters(); + RET(); + + loadStaticRegisters_ = AlignCode16(); + gpr.EmitLoadStaticRegisters(); + LW(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + RET(); + + start = saveStaticRegisters_; + } else { + saveStaticRegisters_ = nullptr; + loadStaticRegisters_ = nullptr; + } + + // TODO: Do we actually need updateRoundingMode_? Hm. + //applyRoundingMode_ = AlignCode16(); + if (false) { + // Not sure if RISC-V has any flush to zero capability? Leaving it off for now... + LWU(SCRATCH2, CTXREG, offsetof(MIPSState, fcr31)); + + // We can skip if the rounding mode is nearest (0) and flush is not set. + // (as restoreRoundingMode cleared it out anyway) + FixupBranch skip = BEQ(SCRATCH2, R_ZERO); + + // MIPS Rounding Mode: RISC-V + // 0: Round nearest 0 + // 1: Round to zero 1 + // 2: Round up (ceil) 3 + // 3: Round down (floor) 2 + if (cpu_info.RiscV_Zbs) { + BEXTI(SCRATCH1, SCRATCH2, 1); + } else { + ANDI(SCRATCH1, SCRATCH2, 2); + SRLI(SCRATCH1, SCRATCH1, 1); + } + // Swap the lowest bit by the second bit. + XOR(SCRATCH2, SCRATCH2, SCRATCH1); + + FSRM(SCRATCH2); + + SetJumpTarget(skip); + RET(); + } + + //updateRoundingMode_ = AlignCode16(); + if (false) { + LWU(SCRATCH2, CTXREG, offsetof(MIPSState, fcr31)); + + // Set SCRATCH2 to FZ:RM (FZ is bit 24, and RM are lowest 2 bits.) + ANDI(SCRATCH1, SCRATCH2, 1 << 24); + ANDI(SCRATCH2, SCRATCH2, 3); + SRLI(SCRATCH1, SCRATCH1, 22); + OR(SCRATCH2, SCRATCH2, SCRATCH1); + + // Let's update js.currentRoundingFunc with the right convertS0ToSCRATCH1 func. + //LI(SCRATCH1, convertS0ToSCRATCH1); + if (cpu_info.RiscV_Zba) { + SH_ADD(3, SCRATCH1, SCRATCH2, SCRATCH1); + } else { + SLLI(SCRATCH2, SCRATCH2, 3); + ADD(SCRATCH1, SCRATCH1, SCRATCH2); + } + LD(SCRATCH2, SCRATCH1, 0); + //LI(SCRATCH1, &js.currentRoundingFunc); + SW(SCRATCH2, SCRATCH1, 0); + RET(); + } + + enterDispatcher_ = AlignCode16(); + + // Start by saving some regs on the stack. There are 12 GPs and 12 FPs we want. + // Note: we leave R_SP as, well, SP, so it doesn't need to be saved. + _assert_msg_(cpu_info.Mode64bit, "RiscVAsm currently assumes RV64, not RV32 or RV128"); + static constexpr RiscVReg regs_to_save[]{ R_RA, X8, X9, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27 }; + // TODO: Maybe we shouldn't regalloc all of these? Is it worth it? + static constexpr RiscVReg regs_to_save_fp[]{ F8, F9, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27 }; + int saveSize = 8 * (int)(ARRAY_SIZE(regs_to_save) + ARRAY_SIZE(regs_to_save_fp)); + if (saveSize & 0xF) + saveSize += 8; + _assert_msg_((saveSize & 0xF) == 0, "Stack must be kept aligned"); + int saveOffset = 0; + ADDI(R_SP, R_SP, -saveSize); + for (RiscVReg r : regs_to_save) { + SD(r, R_SP, saveOffset); + saveOffset += 8; + } + for (RiscVReg r : regs_to_save_fp) { + FS(64, r, R_SP, saveOffset); + saveOffset += 8; + } + _assert_(saveOffset <= saveSize); + + // Fixed registers, these are always kept when in Jit context. + LI(MEMBASEREG, Memory::base, SCRATCH1); + LI(CTXREG, mips_, SCRATCH1); + LI(JITBASEREG, blockStartAddrs_, SCRATCH1); + + LoadStaticRegisters(); + MovFromPC(SCRATCH1); + outerLoopPCInSCRATCH1_ = GetCodePtr(); + MovToPC(SCRATCH1); + outerLoop_ = GetCodePtr(); + // Advance can change the downcount (or thread), so must save/restore around it. + SaveStaticRegisters(); + RestoreRoundingMode(true); + QuickCallFunction(&CoreTiming::Advance); + ApplyRoundingMode(true); + LoadStaticRegisters(); + + dispatcherCheckCoreState_ = GetCodePtr(); + LI(SCRATCH1, &coreState, SCRATCH2); + LW(SCRATCH1, SCRATCH1, 0); + FixupBranch badCoreState = BNE(SCRATCH1, R_ZERO); + + // We just checked coreState, so go to advance if downcount is negative. + BLT(DOWNCOUNTREG, R_ZERO, outerLoop_); + FixupBranch skipToRealDispatch = J(); + + dispatcherPCInSCRATCH1_ = GetCodePtr(); + MovToPC(SCRATCH1); + + dispatcher_ = GetCodePtr(); + FixupBranch bail = BLT(DOWNCOUNTREG, R_ZERO); + SetJumpTarget(skipToRealDispatch); + + dispatcherNoCheck_ = GetCodePtr(); + + // Debug + if (enableDebug) { + MV(X10, DOWNCOUNTREG); + MV(X11, MEMBASEREG); + MV(X12, JITBASEREG); + QuickCallFunction(&ShowPC); + } + + LWU(SCRATCH1, CTXREG, offsetof(MIPSState, pc)); +#ifdef MASKED_PSP_MEMORY + LI(SCRATCH2, 0x3FFFFFFF); + AND(SCRATCH1, SCRATCH1, SCRATCH2); +#endif + ADD(SCRATCH1, SCRATCH1, MEMBASEREG); + dispatcherFetch_ = GetCodePtr(); + LWU(SCRATCH1, SCRATCH1, 0); + SRLI(SCRATCH2, SCRATCH1, 24); + // We're in other words comparing to the top 8 bits of MIPS_EMUHACK_OPCODE by subtracting. + ADDI(SCRATCH2, SCRATCH2, -(MIPS_EMUHACK_OPCODE >> 24)); + FixupBranch needsCompile = BNE(SCRATCH2, R_ZERO); + // Use a wall to mask by 0x00FFFFFF and extract the block number. + SLLI(SCRATCH1, SCRATCH1, XLEN - 24); + // But actually, we want * 8, so skip shifting back just a bit. + _assert_msg_(sizeof(blockStartAddrs_[0]) == 8, "RiscVAsm currently assumes pointers are 64-bit"); + SRLI(SCRATCH1, SCRATCH1, XLEN - 24 - 3); + if (enableDebug) { + // Let's do some extra validation of the block number in debug mode for testing. + + LI(SCRATCH2, MAX_ALLOWED_JIT_BLOCKS * 8); + FixupBranch highBlockNum = BGEU(SCRATCH1, SCRATCH2); + ADD(SCRATCH1, JITBASEREG, SCRATCH1); + // TODO: Consider replacing the block nums after all, just trying to use IR block cache. + LD(SCRATCH1, SCRATCH1, 0); + LI(SCRATCH2, 2); + FixupBranch invalidBlockNum = BEQ(SCRATCH1, R_ZERO); + JR(SCRATCH1); + + SetJumpTarget(highBlockNum); + LI(SCRATCH2, 1); + SetJumpTarget(invalidBlockNum); + + MV(X10, SCRATCH2); + QuickCallFunction(&ShowBlockError); + } else { + ADD(SCRATCH1, JITBASEREG, SCRATCH1); + // TODO: Consider replacing the block nums after all, just trying to use IR block cache. + LD(SCRATCH1, SCRATCH1, 0); + JR(SCRATCH1); + } + SetJumpTarget(needsCompile); + + // No block found, let's jit. We don't need to save static regs, they're all callee saved. + RestoreRoundingMode(true); + QuickCallFunction(&MIPSComp::JitAt); + ApplyRoundingMode(true); + + // Try again, the block index should be set now. + J(dispatcherNoCheck_); + + SetJumpTarget(bail); + + LI(SCRATCH1, &coreState, SCRATCH2); + LW(SCRATCH1, SCRATCH1, 0); + BEQ(SCRATCH1, R_ZERO, outerLoop_); + + const uint8_t *quitLoop = GetCodePtr(); + SetJumpTarget(badCoreState); + + SaveStaticRegisters(); + RestoreRoundingMode(true); + + _assert_msg_(cpu_info.Mode64bit, "RiscVAsm currently assumes RV64, not RV32 or RV128"); + saveOffset = 0; + for (RiscVReg r : regs_to_save) { + LD(r, R_SP, saveOffset); + saveOffset += 8; + } + for (RiscVReg r : regs_to_save_fp) { + FL(64, r, R_SP, saveOffset); + saveOffset += 8; + } + ADDI(R_SP, R_SP, saveSize); + + RET(); + + // TODO + crashHandler_ = GetCodePtr(); + LI(SCRATCH1, &coreState, SCRATCH2); + LI(SCRATCH2, CORE_RUNTIME_ERROR); + SW(SCRATCH2, SCRATCH1, 0); + J(quitLoop); + + // TODO: Do we need this? + static const Round roundModes[8] = { Round::NEAREST_EVEN, Round::TOZERO, Round::UP, Round::DOWN, Round::NEAREST_EVEN, Round::TOZERO, Round::UP, Round::DOWN }; + for (size_t i = 0; i < ARRAY_SIZE(roundModes); ++i) { + //convertS0ToSCRATCH1[i] = AlignCode16(); + + //FCVT(FConv::W, FConv::S, SCRATCH1, F0, roundModes[i]); + //RET(); + } + + // Leave this at the end, add more stuff above. + if (enableDisasm) { + std::vector lines = DisassembleRV64(start, GetCodePtr() - start); + for (auto s : lines) { + INFO_LOG(JIT, "%s", s.c_str()); + } + } + + // Let's spare the pre-generated code from unprotect-reprotect. + AlignCodePage(); + jitStartOffset_ = (int)(GetCodePtr() - start); + // Don't forget to zap the instruction cache! This must stay at the end of this function. + FlushIcache(); + EndWrite(); +} + +} // namespace MIPSComp diff --git a/Core/MIPS/RiscV/RiscVCompALU.cpp b/Core/MIPS/RiscV/RiscVCompALU.cpp new file mode 100644 index 0000000000..61c0da3a70 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVCompALU.cpp @@ -0,0 +1,655 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Common/CPUDetect.h" +#include "Core/MemMap.h" +#include "Core/MIPS/RiscV/RiscVJit.h" +#include "Core/MIPS/RiscV/RiscVRegCache.h" + +// This file contains compilation for integer / arithmetic / logic related instructions. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace RiscVGen; +using namespace RiscVJitConstants; + +void RiscVJit::CompIR_Arith(IRInst inst) { + CONDITIONAL_DISABLE; + + bool allowPtrMath = true; +#ifndef MASKED_PSP_MEMORY + // Since we modify it, we can't safely. + allowPtrMath = false; +#endif + + // RISC-V only adds signed immediates, so rewrite a small enough subtract to an add. + // We use -2047 and 2048 here because the range swaps. + if (inst.op == IROp::SubConst && (int32_t)inst.constant >= -2047 && (int32_t)inst.constant <= 2048) { + inst.op = IROp::AddConst; + inst.constant = (uint32_t)-(int32_t)inst.constant; + } + + switch (inst.op) { + case IROp::Add: + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2, MapType::AVOID_LOAD_MARK_NORM32); + ADDW(gpr.R(inst.dest), gpr.R(inst.src1), gpr.R(inst.src2)); + break; + + case IROp::Sub: + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2, MapType::AVOID_LOAD_MARK_NORM32); + SUBW(gpr.R(inst.dest), gpr.R(inst.src1), gpr.R(inst.src2)); + break; + + case IROp::AddConst: + if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) { + // Typical of stack pointer updates. + if (gpr.IsMappedAsPointer(inst.src1) && inst.dest == inst.src1 && allowPtrMath) { + gpr.MarkPtrDirty(gpr.RPtr(inst.dest)); + ADDI(gpr.RPtr(inst.dest), gpr.RPtr(inst.dest), inst.constant); + } else { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + ADDIW(gpr.R(inst.dest), gpr.R(inst.src1), inst.constant); + } + } else { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + LI(SCRATCH1, (int32_t)inst.constant); + ADDW(gpr.R(inst.dest), gpr.R(inst.src1), SCRATCH1); + } + break; + + case IROp::SubConst: + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + LI(SCRATCH1, (int32_t)inst.constant); + SUBW(gpr.R(inst.dest), gpr.R(inst.src1), SCRATCH1); + break; + + case IROp::Neg: + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + SUBW(gpr.R(inst.dest), R_ZERO, gpr.R(inst.src1)); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_Logic(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::And: + if (inst.src1 != inst.src2) { + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); + AND(gpr.R(inst.dest), gpr.R(inst.src1), gpr.R(inst.src2)); + } else if (inst.src1 != inst.dest) { + gpr.MapDirtyIn(inst.dest, inst.src1); + MV(gpr.R(inst.dest), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(inst.src1)); + } + break; + + case IROp::Or: + if (inst.src1 != inst.src2) { + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); + OR(gpr.R(inst.dest), gpr.R(inst.src1), gpr.R(inst.src2)); + // If both were normalized before, the result is normalized. + if (gpr.IsNormalized32(inst.src1) && gpr.IsNormalized32(inst.src2)) + gpr.MarkDirty(gpr.R(inst.dest), true); + } else if (inst.src1 != inst.dest) { + gpr.MapDirtyIn(inst.dest, inst.src1); + MV(gpr.R(inst.dest), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(inst.src1)); + } + break; + + case IROp::Xor: + if (inst.src1 == inst.src2) { + gpr.SetImm(inst.dest, 0); + } else { + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); + XOR(gpr.R(inst.dest), gpr.R(inst.src1), gpr.R(inst.src2)); + } + break; + + case IROp::AndConst: + if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) { + gpr.MapDirtyIn(inst.dest, inst.src1); + ANDI(gpr.R(inst.dest), gpr.R(inst.src1), inst.constant); + } else { + gpr.MapDirtyIn(inst.dest, inst.src1); + LI(SCRATCH1, (int32_t)inst.constant); + AND(gpr.R(inst.dest), gpr.R(inst.src1), SCRATCH1); + } + // If the sign bits aren't cleared, and it was normalized before - it still is. + if ((inst.constant & 0x80000000) != 0 && gpr.IsNormalized32(inst.src1)) + gpr.MarkDirty(gpr.R(inst.dest), true); + // Otherwise, if we cleared the sign bits, it's naturally normalized. + else if ((inst.constant & 0x80000000) == 0) + gpr.MarkDirty(gpr.R(inst.dest), true); + break; + + case IROp::OrConst: + if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) { + gpr.MapDirtyIn(inst.dest, inst.src1); + ORI(gpr.R(inst.dest), gpr.R(inst.src1), inst.constant); + } else { + gpr.MapDirtyIn(inst.dest, inst.src1); + LI(SCRATCH1, (int32_t)inst.constant); + OR(gpr.R(inst.dest), gpr.R(inst.src1), SCRATCH1); + } + // Since our constant is normalized, oring its bits in won't hurt normalization. + if (gpr.IsNormalized32(inst.src1)) + gpr.MarkDirty(gpr.R(inst.dest), true); + break; + + case IROp::XorConst: + if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) { + gpr.MapDirtyIn(inst.dest, inst.src1); + XORI(gpr.R(inst.dest), gpr.R(inst.src1), inst.constant); + } else { + gpr.MapDirtyIn(inst.dest, inst.src1); + LI(SCRATCH1, (int32_t)inst.constant); + XOR(gpr.R(inst.dest), gpr.R(inst.src1), SCRATCH1); + } + break; + + case IROp::Not: + gpr.MapDirtyIn(inst.dest, inst.src1); + NOT(gpr.R(inst.dest), gpr.R(inst.src1)); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_Assign(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Mov: + if (inst.dest != inst.src1) { + gpr.MapDirtyIn(inst.dest, inst.src1); + MV(gpr.R(inst.dest), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(inst.src1)); + } + break; + + case IROp::Ext8to32: + if (cpu_info.RiscV_Zbb) { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + SEXT_B(gpr.R(inst.dest), gpr.R(inst.src1)); + } else { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + SLLI(gpr.R(inst.dest), gpr.R(inst.src1), 24); + SRAIW(gpr.R(inst.dest), gpr.R(inst.dest), 24); + } + break; + + case IROp::Ext16to32: + if (cpu_info.RiscV_Zbb) { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + SEXT_H(gpr.R(inst.dest), gpr.R(inst.src1)); + } else { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + SLLI(gpr.R(inst.dest), gpr.R(inst.src1), 16); + SRAIW(gpr.R(inst.dest), gpr.R(inst.dest), 16); + } + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_Bits(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::ReverseBits: + CompIR_Generic(inst); + break; + + case IROp::BSwap16: + CompIR_Generic(inst); + break; + + case IROp::BSwap32: + if (cpu_info.RiscV_Zbb) { + gpr.MapDirtyIn(inst.dest, inst.src1); + REV8(gpr.R(inst.dest), gpr.R(inst.src1)); + if (XLEN >= 64) { + // REV8 swaps the entire register, so get the 32 highest bits. + SRAI(gpr.R(inst.dest), gpr.R(inst.dest), XLEN - 32); + gpr.MarkDirty(gpr.R(inst.dest), true); + } + } else { + CompIR_Generic(inst); + } + break; + + case IROp::Clz: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_Shift(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Shl: + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2, MapType::AVOID_LOAD_MARK_NORM32); + SLLW(gpr.R(inst.dest), gpr.R(inst.src1), gpr.R(inst.src2)); + break; + + case IROp::Shr: + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2, MapType::AVOID_LOAD_MARK_NORM32); + SRLW(gpr.R(inst.dest), gpr.R(inst.src1), gpr.R(inst.src2)); + break; + + case IROp::Sar: + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2, MapType::AVOID_LOAD_MARK_NORM32); + SRAW(gpr.R(inst.dest), gpr.R(inst.src1), gpr.R(inst.src2)); + break; + + case IROp::Ror: + if (cpu_info.RiscV_Zbb) { + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2, MapType::AVOID_LOAD_MARK_NORM32); + RORW(gpr.R(inst.dest), gpr.R(inst.src1), gpr.R(inst.src2)); + } else { + CompIR_Generic(inst); + } + break; + + case IROp::ShlImm: + // Shouldn't happen, but let's be safe of any passes that modify the ops. + if (inst.src2 >= 32) { + gpr.SetImm(inst.dest, 0); + } else if (inst.src2 == 0) { + if (inst.dest != inst.src1) { + gpr.MapDirtyIn(inst.dest, inst.src1); + MV(gpr.R(inst.dest), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(inst.src1)); + } + } else { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + SLLIW(gpr.R(inst.dest), gpr.R(inst.src1), inst.src2); + } + break; + + case IROp::ShrImm: + // Shouldn't happen, but let's be safe of any passes that modify the ops. + if (inst.src2 >= 32) { + gpr.SetImm(inst.dest, 0); + } else if (inst.src2 == 0) { + if (inst.dest != inst.src1) { + gpr.MapDirtyIn(inst.dest, inst.src1); + MV(gpr.R(inst.dest), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(inst.src1)); + } + } else { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + SRLIW(gpr.R(inst.dest), gpr.R(inst.src1), inst.src2); + } + break; + + case IROp::SarImm: + // Shouldn't happen, but let's be safe of any passes that modify the ops. + if (inst.src2 >= 32) { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + SRAIW(gpr.R(inst.dest), gpr.R(inst.src1), 31); + } else if (inst.src2 == 0) { + if (inst.dest != inst.src1) { + gpr.MapDirtyIn(inst.dest, inst.src1); + MV(gpr.R(inst.dest), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(inst.src1)); + } + } else { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + SRAIW(gpr.R(inst.dest), gpr.R(inst.src1), inst.src2); + } + break; + + case IROp::RorImm: + if (inst.src2 == 0) { + if (inst.dest != inst.src1) { + gpr.MapDirtyIn(inst.dest, inst.src1); + MV(gpr.R(inst.dest), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(inst.src1)); + } + } else if (cpu_info.RiscV_Zbb) { + gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32); + RORIW(gpr.R(inst.dest), gpr.R(inst.src1), inst.src2 & 31); + } else { + CompIR_Generic(inst); + } + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_Compare(IRInst inst) { + CONDITIONAL_DISABLE; + + RiscVReg lhs = INVALID_REG; + RiscVReg rhs = INVALID_REG; + switch (inst.op) { + case IROp::Slt: + // Not using the NORM32 flag so we don't confuse ourselves on overlap. + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); + NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true); + SLT(gpr.R(inst.dest), lhs, rhs); + gpr.MarkDirty(gpr.R(inst.dest), true); + break; + + case IROp::SltConst: + // Not using the NORM32 flag so we don't confuse ourselves on overlap. + gpr.MapDirtyIn(inst.dest, inst.src1); + if (inst.constant == 0) { + // Basically, getting the sign bit. Let's shift instead. + SRLIW(gpr.R(inst.dest), gpr.R(inst.src1), 31); + } else { + NormalizeSrc1(inst, &lhs, SCRATCH1, false); + + if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) { + SLTI(gpr.R(inst.dest), lhs, (int32_t)inst.constant); + } else { + LI(SCRATCH2, (int32_t)inst.constant); + SLT(gpr.R(inst.dest), lhs, SCRATCH2); + } + } + gpr.MarkDirty(gpr.R(inst.dest), true); + break; + + case IROp::SltU: + // Not using the NORM32 flag so we don't confuse ourselves on overlap. + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); + // It's still fine to sign extend, the biggest just get even bigger. + NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true); + SLTU(gpr.R(inst.dest), lhs, rhs); + gpr.MarkDirty(gpr.R(inst.dest), true); + break; + + case IROp::SltUConst: + // Not using the NORM32 flag so we don't confuse ourselves on overlap. + gpr.MapDirtyIn(inst.dest, inst.src1); + if (inst.constant == 0) { + gpr.SetImm(inst.dest, 0); + } else { + NormalizeSrc1(inst, &lhs, SCRATCH1, false); + + // We sign extend because we're comparing against something normalized. + // It's also the most efficient to set. + if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) { + SLTIU(gpr.R(inst.dest), lhs, (int32_t)inst.constant); + } else { + LI(SCRATCH2, (int32_t)inst.constant); + SLTU(gpr.R(inst.dest), lhs, SCRATCH2); + } + + gpr.MarkDirty(gpr.R(inst.dest), true); + } + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_CondAssign(IRInst inst) { + CONDITIONAL_DISABLE; + + RiscVReg lhs = INVALID_REG; + RiscVReg rhs = INVALID_REG; + FixupBranch fixup; + switch (inst.op) { + case IROp::MovZ: + case IROp::MovNZ: + if (inst.dest == inst.src2) + return; + + // We could have a "zero" that with wrong upper due to XOR, so we have to normalize. + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2, MapType::ALWAYS_LOAD); + NormalizeSrc1(inst, &lhs, SCRATCH1, true); + + switch (inst.op) { + case IROp::MovZ: + fixup = BNE(lhs, R_ZERO); + break; + case IROp::MovNZ: + fixup = BEQ(lhs, R_ZERO); + break; + default: + INVALIDOP; + break; + } + + MV(gpr.R(inst.dest), gpr.R(inst.src2)); + SetJumpTarget(fixup); + break; + + case IROp::Max: + if (inst.src1 != inst.src2) { + if (cpu_info.RiscV_Zbb) { + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); + NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true); + MAX(gpr.R(inst.dest), lhs, rhs); + // Because we had to normalize the inputs, the output is normalized. + gpr.MarkDirty(gpr.R(inst.dest), true); + } else { + CompIR_Generic(inst); + } + } else if (inst.dest != inst.src1) { + gpr.MapDirtyIn(inst.dest, inst.src1); + MV(gpr.R(inst.dest), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(inst.src1)); + } + break; + + case IROp::Min: + if (inst.src1 != inst.src2) { + if (cpu_info.RiscV_Zbb) { + gpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); + NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true); + MIN(gpr.R(inst.dest), lhs, rhs); + // Because we had to normalize the inputs, the output is normalized. + gpr.MarkDirty(gpr.R(inst.dest), true); + } else { + CompIR_Generic(inst); + } + } else if (inst.dest != inst.src1) { + gpr.MapDirtyIn(inst.dest, inst.src1); + MV(gpr.R(inst.dest), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(inst.src1)); + } + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_HiLo(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::MtLo: + gpr.MapDirtyIn(IRREG_LO, inst.src1); + MV(gpr.R(IRREG_LO), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(IRREG_LO), gpr.IsNormalized32(inst.src1)); + break; + + case IROp::MtHi: + gpr.MapDirtyIn(IRREG_HI, inst.src1); + MV(gpr.R(IRREG_HI), gpr.R(inst.src1)); + gpr.MarkDirty(gpr.R(IRREG_HI), gpr.IsNormalized32(inst.src1)); + break; + + case IROp::MfLo: + gpr.MapDirtyIn(inst.dest, IRREG_LO); + MV(gpr.R(inst.dest), gpr.R(IRREG_LO)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(IRREG_LO)); + break; + + case IROp::MfHi: + gpr.MapDirtyIn(inst.dest, IRREG_HI); + MV(gpr.R(inst.dest), gpr.R(IRREG_HI)); + gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(IRREG_HI)); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_Mult(IRInst inst) { + CONDITIONAL_DISABLE; + + auto makeArgsUnsigned = [&](RiscVReg *lhs, RiscVReg *rhs) { + if (cpu_info.RiscV_Zba) { + ZEXT_W(SCRATCH1, gpr.R(inst.src1)); + ZEXT_W(SCRATCH2, gpr.R(inst.src2)); + } else { + SLLI(SCRATCH1, gpr.R(inst.src1), XLEN - 32); + SRLI(SCRATCH1, SCRATCH1, XLEN - 32); + SLLI(SCRATCH2, gpr.R(inst.src2), XLEN - 32); + SRLI(SCRATCH2, SCRATCH2, XLEN - 32); + } + *lhs = SCRATCH1; + *rhs = SCRATCH2; + }; + auto combinePrevMulResult = [&] { + // TODO: Using a single reg for HI/LO would make this less ugly. + if (cpu_info.RiscV_Zba) { + ZEXT_W(gpr.R(IRREG_LO), gpr.R(IRREG_LO)); + } else { + SLLI(gpr.R(IRREG_LO), gpr.R(IRREG_LO), XLEN - 32); + SRLI(gpr.R(IRREG_LO), gpr.R(IRREG_LO), XLEN - 32); + } + SLLI(gpr.R(IRREG_HI), gpr.R(IRREG_HI), 32); + OR(gpr.R(IRREG_LO), gpr.R(IRREG_LO), gpr.R(IRREG_HI)); + }; + auto splitMulResult = [&] { + SRAI(gpr.R(IRREG_HI), gpr.R(IRREG_LO), 32); + gpr.MarkDirty(gpr.R(IRREG_HI), true); + }; + + RiscVReg lhs = INVALID_REG; + RiscVReg rhs = INVALID_REG; + switch (inst.op) { + case IROp::Mult: + // TODO: Maybe IR could simplify when HI is not needed or clobbered? + // TODO: HI/LO merge optimization? Have to be careful of passes that split them... + gpr.MapDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2); + NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true); + MUL(gpr.R(IRREG_LO), lhs, rhs); + splitMulResult(); + break; + + case IROp::MultU: + // This is an "anti-norm32" case. Let's just zero always. + // TODO: If we could know that LO was only needed, we could use MULW and be done. + gpr.MapDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2); + makeArgsUnsigned(&lhs, &rhs); + MUL(gpr.R(IRREG_LO), lhs, rhs); + splitMulResult(); + break; + + case IROp::Madd: + gpr.MapDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2, MapType::ALWAYS_LOAD); + NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true); + MUL(SCRATCH1, lhs, rhs); + + combinePrevMulResult(); + ADD(gpr.R(IRREG_LO), gpr.R(IRREG_LO), SCRATCH1); + splitMulResult(); + break; + + case IROp::MaddU: + gpr.MapDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2, MapType::ALWAYS_LOAD); + makeArgsUnsigned(&lhs, &rhs); + MUL(SCRATCH1, lhs, rhs); + + combinePrevMulResult(); + ADD(gpr.R(IRREG_LO), gpr.R(IRREG_LO), SCRATCH1); + splitMulResult(); + break; + + case IROp::Msub: + gpr.MapDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2, MapType::ALWAYS_LOAD); + NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true); + MUL(SCRATCH1, lhs, rhs); + + combinePrevMulResult(); + SUB(gpr.R(IRREG_LO), gpr.R(IRREG_LO), SCRATCH1); + splitMulResult(); + break; + + case IROp::MsubU: + gpr.MapDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2, MapType::ALWAYS_LOAD); + makeArgsUnsigned(&lhs, &rhs); + MUL(SCRATCH1, lhs, rhs); + + combinePrevMulResult(); + SUB(gpr.R(IRREG_LO), gpr.R(IRREG_LO), SCRATCH1); + splitMulResult(); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_Div(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Div: + case IROp::DivU: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp diff --git a/Core/MIPS/RiscV/RiscVCompBranch.cpp b/Core/MIPS/RiscV/RiscVCompBranch.cpp new file mode 100644 index 0000000000..c477a5fcae --- /dev/null +++ b/Core/MIPS/RiscV/RiscVCompBranch.cpp @@ -0,0 +1,146 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Core/MemMap.h" +#include "Core/MIPS/RiscV/RiscVJit.h" +#include "Core/MIPS/RiscV/RiscVRegCache.h" + +// This file contains compilation for exits. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace RiscVGen; +using namespace RiscVJitConstants; + +void RiscVJit::CompIR_Exit(IRInst inst) { + CONDITIONAL_DISABLE; + + RiscVReg exitReg = INVALID_REG; + switch (inst.op) { + case IROp::ExitToConst: + FlushAll(); + LI(SCRATCH1, inst.constant); + QuickJ(R_RA, dispatcherPCInSCRATCH1_); + break; + + case IROp::ExitToReg: + exitReg = gpr.MapReg(inst.src1); + FlushAll(); + // TODO: If ever we don't read this back in dispatcherPCInSCRATCH1_, we should zero upper. + MV(SCRATCH1, exitReg); + QuickJ(R_RA, dispatcherPCInSCRATCH1_); + break; + + case IROp::ExitToPC: + FlushAll(); + QuickJ(R_RA, dispatcher_); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_ExitIf(IRInst inst) { + CONDITIONAL_DISABLE; + + RiscVReg lhs = INVALID_REG; + RiscVReg rhs = INVALID_REG; + FixupBranch fixup; + switch (inst.op) { + case IROp::ExitToConstIfEq: + case IROp::ExitToConstIfNeq: + gpr.MapInIn(inst.src1, inst.src2); + // We can't use SCRATCH1, which is destroyed by FlushAll()... but cheat and use R_RA. + NormalizeSrc12(inst, &lhs, &rhs, R_RA, SCRATCH2, true); + FlushAll(); + + switch (inst.op) { + case IROp::ExitToConstIfEq: + fixup = BNE(lhs, rhs); + break; + + case IROp::ExitToConstIfNeq: + fixup = BEQ(lhs, rhs); + break; + + default: + INVALIDOP; + break; + } + + LI(SCRATCH1, inst.constant); + QuickJ(R_RA, dispatcherPCInSCRATCH1_); + SetJumpTarget(fixup); + break; + + case IROp::ExitToConstIfGtZ: + case IROp::ExitToConstIfGeZ: + case IROp::ExitToConstIfLtZ: + case IROp::ExitToConstIfLeZ: + gpr.MapReg(inst.src1); + NormalizeSrc1(inst, &lhs, SCRATCH2, true); + FlushAll(); + + switch (inst.op) { + case IROp::ExitToConstIfGtZ: + fixup = BGE(R_ZERO, lhs); + break; + + case IROp::ExitToConstIfGeZ: + fixup = BLT(lhs, R_ZERO); + break; + + case IROp::ExitToConstIfLtZ: + fixup = BGE(lhs, R_ZERO); + break; + + case IROp::ExitToConstIfLeZ: + fixup = BLT(R_ZERO, lhs); + break; + + default: + INVALIDOP; + break; + } + + LI(SCRATCH1, inst.constant); + QuickJ(R_RA, dispatcherPCInSCRATCH1_); + SetJumpTarget(fixup); + break; + + case IROp::ExitToConstIfFpTrue: + case IROp::ExitToConstIfFpFalse: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp diff --git a/Core/MIPS/RiscV/RiscVCompFPU.cpp b/Core/MIPS/RiscV/RiscVCompFPU.cpp new file mode 100644 index 0000000000..ef8e6a8db8 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVCompFPU.cpp @@ -0,0 +1,186 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Core/MemMap.h" +#include "Core/MIPS/RiscV/RiscVJit.h" +#include "Core/MIPS/RiscV/RiscVRegCache.h" + +// This file contains compilation for floating point related instructions. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace RiscVGen; +using namespace RiscVJitConstants; + +void RiscVJit::CompIR_FArith(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FAdd: + case IROp::FSub: + case IROp::FMul: + case IROp::FDiv: + case IROp::FSqrt: + case IROp::FNeg: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_FCondAssign(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FMin: + case IROp::FMax: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_FAssign(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FMov: + case IROp::FAbs: + case IROp::FSign: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_FRound(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FRound: + case IROp::FTrunc: + case IROp::FCeil: + case IROp::FFloor: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_FCvt(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FCvtWS: + case IROp::FCvtSW: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_FSat(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FSat0_1: + case IROp::FSatMinus1_1: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_FCompare(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::ZeroFpCond: + case IROp::FCmp: + case IROp::FCmovVfpuCC: + case IROp::FCmpVfpuBit: + case IROp::FCmpVfpuAggregate: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_RoundingMode(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::RestoreRoundingMode: + case IROp::ApplyRoundingMode: + case IROp::UpdateRoundingMode: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_FSpecial(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FSin: + case IROp::FCos: + case IROp::FRSqrt: + case IROp::FRecip: + case IROp::FAsin: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp diff --git a/Core/MIPS/RiscV/RiscVCompLoadStore.cpp b/Core/MIPS/RiscV/RiscVCompLoadStore.cpp new file mode 100644 index 0000000000..fd0ca6392d --- /dev/null +++ b/Core/MIPS/RiscV/RiscVCompLoadStore.cpp @@ -0,0 +1,252 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Core/MemMap.h" +#include "Core/MIPS/RiscV/RiscVJit.h" +#include "Core/MIPS/RiscV/RiscVRegCache.h" + +// This file contains compilation for load/store instructions. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace RiscVGen; +using namespace RiscVJitConstants; + +void RiscVJit::SetScratch1ToSrc1Address(IRReg src1) { + gpr.MapReg(src1); +#ifdef MASKED_PSP_MEMORY + SLLIW(SCRATCH1, gpr.R(src1), 2); + SRLIW(SCRATCH1, SCRATCH1, 2); + ADD(SCRATCH1, SCRATCH1, MEMBASEREG); +#else + // Clear the top bits to be safe. + if (cpu_info.RiscV_Zba) { + ADD_UW(SCRATCH1, gpr.R(src1), MEMBASEREG); + } else { + _assert_(XLEN == 64); + SLLI(SCRATCH1, gpr.R(src1), 32); + SRLI(SCRATCH1, SCRATCH1, 32); + ADD(SCRATCH1, SCRATCH1, MEMBASEREG); + } +#endif +} + +int32_t RiscVJit::AdjustForAddressOffset(RiscVGen::RiscVReg *reg, int32_t constant) { + if (constant < -2048 || constant > 2047) { + LI(SCRATCH2, constant); + ADD(SCRATCH1, *reg, SCRATCH2); + *reg = SCRATCH1; + return 0; + } + return constant; +} + +void RiscVJit::CompIR_Load(IRInst inst) { + CONDITIONAL_DISABLE; + + gpr.SpillLock(inst.dest, inst.src1); + RiscVReg addrReg = INVALID_REG; + if (inst.src1 == MIPS_REG_ZERO) { + // This will get changed by AdjustForAddressOffset. + addrReg = MEMBASEREG; +#ifdef MASKED_PSP_MEMORY + inst.constant &= Memory::MEMVIEW32_MASK; +#endif + } else if (jo.cachePointers || gpr.IsMappedAsPointer(inst.src1)) { + addrReg = gpr.MapRegAsPointer(inst.src1); + } else { + SetScratch1ToSrc1Address(inst.src1); + addrReg = SCRATCH1; + } + // If they're the same, MapReg may subtract MEMBASEREG, so just mark dirty. + if (inst.dest == inst.src1) + gpr.MarkDirty(gpr.R(inst.dest), true); + else + gpr.MapReg(inst.dest, MIPSMap::NOINIT | MIPSMap::MARK_NORM32); + gpr.ReleaseSpillLock(inst.dest, inst.src1); + + s32 imm = AdjustForAddressOffset(&addrReg, inst.constant); + + // TODO: Safe memory? Or enough to have crash handler + validate? + + switch (inst.op) { + case IROp::Load8: + LBU(gpr.R(inst.dest), addrReg, imm); + break; + + case IROp::Load8Ext: + LB(gpr.R(inst.dest), addrReg, imm); + break; + + case IROp::Load16: + LHU(gpr.R(inst.dest), addrReg, imm); + break; + + case IROp::Load16Ext: + LH(gpr.R(inst.dest), addrReg, imm); + break; + + case IROp::Load32: + LW(gpr.R(inst.dest), addrReg, imm); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_LoadShift(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Load32Left: + case IROp::Load32Right: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_FLoad(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::LoadFloat: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_VecLoad(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::LoadVec4: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_Store(IRInst inst) { + CONDITIONAL_DISABLE; + + gpr.SpillLock(inst.src3, inst.src1); + RiscVReg addrReg = INVALID_REG; + if (inst.src1 == MIPS_REG_ZERO) { + // This will get changed by AdjustForAddressOffset. + addrReg = MEMBASEREG; +#ifdef MASKED_PSP_MEMORY + inst.constant &= Memory::MEMVIEW32_MASK; +#endif + } else if ((jo.cachePointers || gpr.IsMappedAsPointer(inst.src1)) && inst.src3 != inst.src1) { + addrReg = gpr.MapRegAsPointer(inst.src1); + } else { + SetScratch1ToSrc1Address(inst.src1); + addrReg = SCRATCH1; + } + RiscVReg valueReg = gpr.TryMapTempImm(inst.src3); + if (valueReg == INVALID_REG) + valueReg = gpr.MapReg(inst.src3); + gpr.ReleaseSpillLock(inst.src3, inst.src1); + + s32 imm = AdjustForAddressOffset(&addrReg, inst.constant); + + // TODO: Safe memory? Or enough to have crash handler + validate? + + switch (inst.op) { + case IROp::Store8: + SB(valueReg, addrReg, imm); + break; + + case IROp::Store16: + SH(valueReg, addrReg, imm); + break; + + case IROp::Store32: + SW(valueReg, addrReg, imm); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_StoreShift(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Store32Left: + case IROp::Store32Right: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_FStore(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::StoreFloat: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_VecStore(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::StoreVec4: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp diff --git a/Core/MIPS/RiscV/RiscVCompSystem.cpp b/Core/MIPS/RiscV/RiscVCompSystem.cpp new file mode 100644 index 0000000000..63eb555d31 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVCompSystem.cpp @@ -0,0 +1,145 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Core/MemMap.h" +#include "Core/MIPS/RiscV/RiscVJit.h" +#include "Core/MIPS/RiscV/RiscVRegCache.h" + +// This file contains compilation for basic PC/downcount accounting, syscalls, debug funcs, etc. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace RiscVGen; +using namespace RiscVJitConstants; + +void RiscVJit::CompIR_Basic(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::SetConst: + // Sign extend all constants. We get 0xFFFFFFFF sometimes, and it's more work to truncate. + // The register only holds 32 bits in the end anyway. + gpr.SetImm(inst.dest, (int32_t)inst.constant); + break; + + case IROp::SetConstF: + CompIR_Generic(inst); + break; + + case IROp::Downcount: + if (inst.constant <= 2048) { + ADDI(DOWNCOUNTREG, DOWNCOUNTREG, -(s32)inst.constant); + } else { + LI(SCRATCH1, inst.constant, SCRATCH2); + SUB(DOWNCOUNTREG, DOWNCOUNTREG, SCRATCH1); + } + break; + + case IROp::SetPC: + gpr.MapIn(inst.src1); + MovToPC(gpr.R(inst.src1)); + break; + + case IROp::SetPCConst: + LI(SCRATCH1, inst.constant, SCRATCH2); + MovToPC(SCRATCH1); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_Transfer(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::SetCtrlVFPU: + case IROp::SetCtrlVFPUReg: + case IROp::SetCtrlVFPUFReg: + case IROp::FpCondToReg: + case IROp::VfpuCtrlToReg: + case IROp::FMovFromGPR: + case IROp::FMovToGPR: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_System(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Interpret: + case IROp::Syscall: + case IROp::CallReplacement: + case IROp::Break: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_Breakpoint(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Breakpoint: + case IROp::MemoryCheck: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_ValidateAddress(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::ValidateAddress8: + case IROp::ValidateAddress16: + case IROp::ValidateAddress32: + case IROp::ValidateAddress128: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp diff --git a/Core/MIPS/RiscV/RiscVCompVec.cpp b/Core/MIPS/RiscV/RiscVCompVec.cpp new file mode 100644 index 0000000000..ac54192dd9 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVCompVec.cpp @@ -0,0 +1,123 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Core/MemMap.h" +#include "Core/MIPS/RiscV/RiscVJit.h" +#include "Core/MIPS/RiscV/RiscVRegCache.h" + +// This file contains compilation for vector instructions. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace RiscVGen; +using namespace RiscVJitConstants; + +void RiscVJit::CompIR_VecAssign(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Vec4Init: + case IROp::Vec4Shuffle: + case IROp::Vec4Mov: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_VecArith(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Vec4Add: + case IROp::Vec4Sub: + case IROp::Vec4Mul: + case IROp::Vec4Div: + case IROp::Vec4Scale: + case IROp::Vec4Neg: + case IROp::Vec4Abs: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_VecHoriz(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Vec4Dot: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_VecPack(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Vec2Unpack16To31: + case IROp::Vec2Unpack16To32: + case IROp::Vec4Unpack8To32: + case IROp::Vec4DuplicateUpperBitsAndShift1: + case IROp::Vec4Pack31To8: + case IROp::Vec4Pack32To8: + case IROp::Vec2Pack31To16: + case IROp::Vec2Pack32To16: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void RiscVJit::CompIR_VecClamp(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Vec4ClampToZero: + case IROp::Vec2ClampToZero: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp diff --git a/Core/MIPS/RiscV/RiscVJit.cpp b/Core/MIPS/RiscV/RiscVJit.cpp new file mode 100644 index 0000000000..980e5b6347 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVJit.cpp @@ -0,0 +1,557 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Common/StringUtils.h" +#include "Core/MemMap.h" +#include "Core/MIPS/RiscV/RiscVJit.h" +#include "Core/MIPS/RiscV/RiscVRegCache.h" +#include "Common/Profiler/Profiler.h" + +namespace MIPSComp { + +using namespace RiscVGen; +using namespace RiscVJitConstants; + +RiscVJit::RiscVJit(MIPSState *mipsState) : IRJit(mipsState), gpr(mipsState, &jo) { + // Automatically disable incompatible options. + if (((intptr_t)Memory::base & 0x00000000FFFFFFFFUL) != 0) { + jo.enablePointerify = false; + } + + AllocCodeSpace(1024 * 1024 * 16); + SetAutoCompress(true); + + // TODO: Consider replacing block num method form IRJit - this is 2MB. + blockStartAddrs_ = new const u8 *[MAX_ALLOWED_JIT_BLOCKS]; + memset(blockStartAddrs_, 0, sizeof(blockStartAddrs_[0]) * MAX_ALLOWED_JIT_BLOCKS); + + gpr.Init(this); + // TODO: fpr + + GenerateFixedCode(jo); +} + +RiscVJit::~RiscVJit() { + delete [] blockStartAddrs_; +} + +void RiscVJit::RunLoopUntil(u64 globalticks) { + PROFILE_THIS_SCOPE("jit"); + ((void (*)())enterDispatcher_)(); +} + +bool RiscVJit::CompileBlock(u32 em_address, std::vector &instructions, u32 &mipsBytes, bool preload) { + // Check that we're not full (we allow less blocks than IR itself.) + if (blocks_.GetNumBlocks() >= MAX_ALLOWED_JIT_BLOCKS - 1) + return false; + + if (!IRJit::CompileBlock(em_address, instructions, mipsBytes, preload)) + return false; + + // TODO: Block linking, checked entries and such. + + int block_num; + if (preload) { + block_num = blocks_.GetBlockNumberFromStartAddress(em_address); + } else { + u32 first_inst = Memory::ReadUnchecked_U32(em_address); + _assert_msg_(MIPS_IS_RUNBLOCK(first_inst), "Should've written an emuhack"); + + block_num = first_inst & MIPS_EMUHACK_VALUE_MASK; + } + + _assert_msg_(block_num >= 0 && block_num < MAX_ALLOWED_JIT_BLOCKS, "Bad block num"); + _assert_msg_(blockStartAddrs_[block_num] == nullptr, "Block %d reused before clear", block_num); + blockStartAddrs_[block_num] = GetCodePointer(); + + gpr.Start(); + // TODO: fpr. + + for (const IRInst &inst : instructions) { + CompileIRInst(inst); + + if (jo.Disabled(JitDisable::REGALLOC_GPR)) { + gpr.FlushAll(); + } + // TODO + if (jo.Disabled(JitDisable::REGALLOC_FPR)) { + //fpr.FlushAll(); + } + + // Safety check, in case we get a bunch of really large jit ops without a lot of branching. + if (GetSpaceLeft() < 0x800) { + return false; + } + } + + // Note: a properly constructed block should never get here. + // TODO: Need to do more than just this? Call a func to set an exception? + QuickJ(R_RA, crashHandler_); + + FlushIcache(); + + return true; +} + +static u32 DoIRInst(uint64_t value) { + IRInst inst; + memcpy(&inst, &value, sizeof(inst)); + + return IRInterpret(currentMIPS, &inst, 1); +} + +void RiscVJit::CompileIRInst(IRInst inst) { + switch (inst.op) { + case IROp::Nop: + break; + + case IROp::SetConst: + case IROp::SetConstF: + case IROp::Downcount: + case IROp::SetPC: + case IROp::SetPCConst: + CompIR_Basic(inst); + break; + + case IROp::Add: + case IROp::Sub: + case IROp::AddConst: + case IROp::SubConst: + case IROp::Neg: + CompIR_Arith(inst); + break; + + case IROp::And: + case IROp::Or: + case IROp::Xor: + case IROp::AndConst: + case IROp::OrConst: + case IROp::XorConst: + case IROp::Not: + CompIR_Logic(inst); + break; + + case IROp::Mov: + case IROp::Ext8to32: + case IROp::Ext16to32: + CompIR_Assign(inst); + break; + + case IROp::ReverseBits: + case IROp::BSwap16: + case IROp::BSwap32: + case IROp::Clz: + CompIR_Bits(inst); + break; + + case IROp::Shl: + case IROp::Shr: + case IROp::Sar: + case IROp::Ror: + case IROp::ShlImm: + case IROp::ShrImm: + case IROp::SarImm: + case IROp::RorImm: + CompIR_Shift(inst); + break; + + case IROp::Slt: + case IROp::SltConst: + case IROp::SltU: + case IROp::SltUConst: + CompIR_Compare(inst); + break; + + case IROp::MovZ: + case IROp::MovNZ: + case IROp::Max: + case IROp::Min: + CompIR_CondAssign(inst); + break; + + case IROp::MtLo: + case IROp::MtHi: + case IROp::MfLo: + case IROp::MfHi: + CompIR_HiLo(inst); + break; + + case IROp::Mult: + case IROp::MultU: + case IROp::Madd: + case IROp::MaddU: + case IROp::Msub: + case IROp::MsubU: + CompIR_Mult(inst); + break; + + case IROp::Div: + case IROp::DivU: + CompIR_Div(inst); + break; + + case IROp::Load8: + case IROp::Load8Ext: + case IROp::Load16: + case IROp::Load16Ext: + case IROp::Load32: + CompIR_Load(inst); + break; + + case IROp::Load32Left: + case IROp::Load32Right: + CompIR_LoadShift(inst); + break; + + case IROp::LoadFloat: + CompIR_FLoad(inst); + break; + + case IROp::LoadVec4: + CompIR_VecLoad(inst); + break; + + case IROp::Store8: + case IROp::Store16: + case IROp::Store32: + CompIR_Store(inst); + break; + + case IROp::Store32Left: + case IROp::Store32Right: + CompIR_StoreShift(inst); + break; + + case IROp::StoreFloat: + CompIR_FStore(inst); + break; + + case IROp::StoreVec4: + CompIR_VecStore(inst); + break; + + case IROp::FAdd: + case IROp::FSub: + case IROp::FMul: + case IROp::FDiv: + case IROp::FSqrt: + case IROp::FNeg: + CompIR_FArith(inst); + break; + + case IROp::FMin: + case IROp::FMax: + CompIR_FCondAssign(inst); + break; + + case IROp::FMov: + case IROp::FAbs: + case IROp::FSign: + CompIR_FAssign(inst); + break; + + case IROp::FRound: + case IROp::FTrunc: + case IROp::FCeil: + case IROp::FFloor: + CompIR_FRound(inst); + break; + + case IROp::FCvtWS: + case IROp::FCvtSW: + CompIR_FCvt(inst); + break; + + case IROp::FSat0_1: + case IROp::FSatMinus1_1: + CompIR_FSat(inst); + break; + + case IROp::ZeroFpCond: + case IROp::FCmp: + case IROp::FCmovVfpuCC: + case IROp::FCmpVfpuBit: + case IROp::FCmpVfpuAggregate: + CompIR_FCompare(inst); + break; + + case IROp::RestoreRoundingMode: + case IROp::ApplyRoundingMode: + case IROp::UpdateRoundingMode: + CompIR_RoundingMode(inst); + break; + + case IROp::SetCtrlVFPU: + case IROp::SetCtrlVFPUReg: + case IROp::SetCtrlVFPUFReg: + case IROp::FpCondToReg: + case IROp::VfpuCtrlToReg: + case IROp::FMovFromGPR: + case IROp::FMovToGPR: + CompIR_Transfer(inst); + break; + + case IROp::Vec4Init: + case IROp::Vec4Shuffle: + case IROp::Vec4Mov: + CompIR_VecAssign(inst); + break; + + case IROp::Vec4Add: + case IROp::Vec4Sub: + case IROp::Vec4Mul: + case IROp::Vec4Div: + case IROp::Vec4Scale: + case IROp::Vec4Neg: + case IROp::Vec4Abs: + CompIR_VecArith(inst); + break; + + case IROp::Vec4Dot: + CompIR_VecHoriz(inst); + break; + + case IROp::Vec2Unpack16To31: + case IROp::Vec2Unpack16To32: + case IROp::Vec4Unpack8To32: + case IROp::Vec4DuplicateUpperBitsAndShift1: + case IROp::Vec4Pack31To8: + case IROp::Vec4Pack32To8: + case IROp::Vec2Pack31To16: + case IROp::Vec2Pack32To16: + CompIR_VecPack(inst); + break; + + case IROp::Vec4ClampToZero: + case IROp::Vec2ClampToZero: + CompIR_VecClamp(inst); + break; + + case IROp::FSin: + case IROp::FCos: + case IROp::FRSqrt: + case IROp::FRecip: + case IROp::FAsin: + CompIR_FSpecial(inst); + break; + + case IROp::Interpret: + case IROp::Syscall: + case IROp::CallReplacement: + case IROp::Break: + CompIR_System(inst); + break; + + case IROp::Breakpoint: + case IROp::MemoryCheck: + CompIR_Breakpoint(inst); + break; + + case IROp::ValidateAddress8: + case IROp::ValidateAddress16: + case IROp::ValidateAddress32: + case IROp::ValidateAddress128: + CompIR_ValidateAddress(inst); + break; + + case IROp::ExitToConst: + case IROp::ExitToReg: + case IROp::ExitToPC: + CompIR_Exit(inst); + break; + + case IROp::ExitToConstIfEq: + case IROp::ExitToConstIfNeq: + case IROp::ExitToConstIfGtZ: + case IROp::ExitToConstIfGeZ: + case IROp::ExitToConstIfLtZ: + case IROp::ExitToConstIfLeZ: + case IROp::ExitToConstIfFpTrue: + case IROp::ExitToConstIfFpFalse: + CompIR_ExitIf(inst); + break; + + default: + _assert_msg_(false, "Unexpected IR op %d", (int)inst.op); + CompIR_Generic(inst); + break; + } +} + +void RiscVJit::CompIR_Generic(IRInst inst) { + // For now, we're gonna do it the slow and ugly way. + // Maybe there's a smarter way to fallback? + uint64_t value; + memcpy(&value, &inst, sizeof(inst)); + + FlushAll(); + LI(X10, value, SCRATCH2); + SaveStaticRegisters(); + QuickCallFunction(&DoIRInst); + LoadStaticRegisters(); + // Result in X10 aka SCRATCH1. + _assert_(X10 == SCRATCH1); + if (BInRange(dispatcherPCInSCRATCH1_)) { + BNE(X10, R_ZERO, dispatcherPCInSCRATCH1_); + } else { + FixupBranch skip = BEQ(X10, R_ZERO); + QuickJ(R_RA, dispatcherPCInSCRATCH1_); + SetJumpTarget(skip); + } +} + +void RiscVJit::FlushAll() { + gpr.FlushAll(); + // TODO: fpr. +} + +bool RiscVJit::DescribeCodePtr(const u8 *ptr, std::string &name) { + // Used in disassembly viewer. + if (ptr == dispatcher_) { + name = "dispatcher"; + } else if (ptr == dispatcherPCInSCRATCH1_) { + name = "dispatcher (PC in SCRATCH1)"; + } else if (ptr == dispatcherNoCheck_) { + name = "dispatcherNoCheck"; + } else if (ptr == saveStaticRegisters_) { + name = "saveStaticRegisters"; + } else if (ptr == loadStaticRegisters_) { + name = "loadStaticRegisters"; + } else if (ptr == enterDispatcher_) { + name = "enterDispatcher"; + } else if (!IsInSpace(ptr)) { + return false; + } else { + uintptr_t uptr = (uintptr_t)ptr; + int block_num = -1; + for (int i = 0; i < MAX_ALLOWED_JIT_BLOCKS; ++i) { + uintptr_t blockptr = (uintptr_t)blockStartAddrs_[i]; + // Out of allocated blocks. + if (uptr == 0) + break; + + if (uptr >= blockptr) + block_num = i; + if (uptr < blockptr) + break; + } + + if (block_num == -1) { + name = "(unknown or deleted block)"; + return true; + } + + const IRBlock *block = blocks_.GetBlock(block_num); + if (block) { + u32 start = 0, size = 0; + block->GetRange(start, size); + name = StringFromFormat("(block %d at %08x)", block_num, start); + return true; + } + return false; + } + return true; +} + +bool RiscVJit::CodeInRange(const u8 *ptr) const { + return IsInSpace(ptr); +} + +bool RiscVJit::IsAtDispatchFetch(const u8 *ptr) const { + return ptr == dispatcherFetch_; +} + +const u8 *RiscVJit::GetDispatcher() const { + return dispatcher_; +} + +const u8 *RiscVJit::GetCrashHandler() const { + return crashHandler_; +} + +void RiscVJit::ClearCache() { + IRJit::ClearCache(); + + ClearCodeSpace(jitStartOffset_); + FlushIcacheSection(region + jitStartOffset_, region + region_size - jitStartOffset_); + + memset(blockStartAddrs_, 0, sizeof(blockStartAddrs_[0]) * MAX_ALLOWED_JIT_BLOCKS); +} + +void RiscVJit::UpdateFCR31() { + IRJit::UpdateFCR31(); + + // TODO: Handle rounding modes? +} + +void RiscVJit::RestoreRoundingMode(bool force) { + // TODO: Could maybe skip if not hasSetRounding? But that's on IRFrontend... + FSRMI(Round::NEAREST_EVEN); +} + +void RiscVJit::ApplyRoundingMode(bool force) { + // TODO: Also could maybe sometimes skip? + //QuickCallFunction(applyRoundingMode_); +} + +void RiscVJit::MovFromPC(RiscVReg r) { + LWU(r, CTXREG, offsetof(MIPSState, pc)); +} + +void RiscVJit::MovToPC(RiscVReg r) { + SW(r, CTXREG, offsetof(MIPSState, pc)); +} + +void RiscVJit::SaveStaticRegisters() { + if (jo.useStaticAlloc) { + QuickCallFunction(saveStaticRegisters_); + } else { + // Inline the single operation + SW(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + } +} + +void RiscVJit::LoadStaticRegisters() { + if (jo.useStaticAlloc) { + QuickCallFunction(loadStaticRegisters_); + } else { + LW(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + } +} + +void RiscVJit::NormalizeSrc1(IRInst inst, RiscVReg *reg, RiscVReg tempReg, bool allowOverlap) { + *reg = NormalizeR(inst.src1, allowOverlap ? 0 : inst.dest, tempReg); +} + +void RiscVJit::NormalizeSrc12(IRInst inst, RiscVReg *lhs, RiscVReg *rhs, RiscVReg lhsTempReg, RiscVReg rhsTempReg, bool allowOverlap) { + *lhs = NormalizeR(inst.src1, allowOverlap ? 0 : inst.dest, lhsTempReg); + *rhs = NormalizeR(inst.src2, allowOverlap ? 0 : inst.dest, rhsTempReg); +} + +RiscVReg RiscVJit::NormalizeR(IRRegIndex rs, IRRegIndex rd, RiscVReg tempReg) { + // For proper compare, we must sign extend so they both match or don't match. + // But don't change pointers, in case one is SP (happens in LittleBigPlanet.) + if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0) { + return R_ZERO; + } else if (gpr.IsMappedAsPointer(rs) || rs == rd) { + return gpr.Normalize32(rs, tempReg); + } else { + return gpr.Normalize32(rs); + } +} + +} // namespace MIPSComp diff --git a/Core/MIPS/RiscV/RiscVJit.h b/Core/MIPS/RiscV/RiscVJit.h new file mode 100644 index 0000000000..5f32aee999 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVJit.h @@ -0,0 +1,138 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include +#include +#include "Common/RiscVEmitter.h" +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/JitCommon/JitState.h" +#include "Core/MIPS/JitCommon/JitCommon.h" +#include "Core/MIPS/RiscV/RiscVRegCache.h" + +namespace MIPSComp { + +class RiscVJit : public RiscVGen::RiscVCodeBlock, public IRJit { +public: + RiscVJit(MIPSState *mipsState); + ~RiscVJit(); + + void RunLoopUntil(u64 globalticks) override; + + bool DescribeCodePtr(const u8 *ptr, std::string &name) override; + bool CodeInRange(const u8 *ptr) const override; + bool IsAtDispatchFetch(const u8 *ptr) const override; + const u8 *GetDispatcher() const override; + const u8 *GetCrashHandler() const override; + + void ClearCache() override; + void UpdateFCR31() override; + + // TODO: GetBlockCacheDebugInterface, block linking? + +protected: + bool CompileBlock(u32 em_address, std::vector &instructions, u32 &mipsBytes, bool preload) override; + + void CompileIRInst(IRInst inst); + +private: + void GenerateFixedCode(const JitOptions &jo); + + void RestoreRoundingMode(bool force = false); + void ApplyRoundingMode(bool force = false); + void MovFromPC(RiscVGen::RiscVReg r); + void MovToPC(RiscVGen::RiscVReg r); + + void SaveStaticRegisters(); + void LoadStaticRegisters(); + + // Note: destroys SCRATCH1. + void FlushAll(); + + void CompIR_Arith(IRInst inst); + void CompIR_Assign(IRInst inst); + void CompIR_Basic(IRInst inst); + void CompIR_Bits(IRInst inst); + void CompIR_Breakpoint(IRInst inst); + void CompIR_Compare(IRInst inst); + void CompIR_CondAssign(IRInst inst); + void CompIR_Div(IRInst inst); + void CompIR_Exit(IRInst inst); + void CompIR_ExitIf(IRInst inst); + void CompIR_FArith(IRInst inst); + void CompIR_FAssign(IRInst inst); + void CompIR_FCompare(IRInst inst); + void CompIR_FCondAssign(IRInst inst); + void CompIR_FCvt(IRInst inst); + void CompIR_FLoad(IRInst inst); + void CompIR_FRound(IRInst inst); + void CompIR_FSat(IRInst inst); + void CompIR_FSpecial(IRInst inst); + void CompIR_FStore(IRInst inst); + void CompIR_Generic(IRInst inst); + void CompIR_HiLo(IRInst inst); + void CompIR_Load(IRInst inst); + void CompIR_LoadShift(IRInst inst); + void CompIR_Logic(IRInst inst); + void CompIR_Mult(IRInst inst); + void CompIR_RoundingMode(IRInst inst); + void CompIR_Shift(IRInst inst); + void CompIR_Store(IRInst inst); + void CompIR_StoreShift(IRInst inst); + void CompIR_System(IRInst inst); + void CompIR_Transfer(IRInst inst); + void CompIR_VecArith(IRInst inst); + void CompIR_VecAssign(IRInst inst); + void CompIR_VecClamp(IRInst inst); + void CompIR_VecHoriz(IRInst inst); + void CompIR_VecLoad(IRInst inst); + void CompIR_VecPack(IRInst inst); + void CompIR_VecStore(IRInst inst); + void CompIR_ValidateAddress(IRInst inst); + + void SetScratch1ToSrc1Address(IRReg src1); + // Modifies SCRATCH regs. + int32_t AdjustForAddressOffset(RiscVGen::RiscVReg *reg, int32_t constant); + void NormalizeSrc1(IRInst inst, RiscVGen::RiscVReg *reg, RiscVGen::RiscVReg tempReg, bool allowOverlap); + void NormalizeSrc12(IRInst inst, RiscVGen::RiscVReg *lhs, RiscVGen::RiscVReg *rhs, RiscVGen::RiscVReg lhsTempReg, RiscVGen::RiscVReg rhsTempReg, bool allowOverlap); + RiscVGen::RiscVReg NormalizeR(IRRegIndex rs, IRRegIndex rd, RiscVGen::RiscVReg tempReg); + + RiscVRegCache gpr; + + static constexpr int MAX_ALLOWED_JIT_BLOCKS = 262144; + + const u8 *enterDispatcher_ = nullptr; + + const u8 *outerLoop_ = nullptr; + const u8 *outerLoopPCInSCRATCH1_ = nullptr; + const u8 *dispatcherCheckCoreState_ = nullptr; + const u8 *dispatcherPCInSCRATCH1_ = nullptr; + const u8 *dispatcher_ = nullptr; + const u8 *dispatcherNoCheck_ = nullptr; + const u8 *dispatcherFetch_ = nullptr; + + const u8 *saveStaticRegisters_ = nullptr; + const u8 *loadStaticRegisters_ = nullptr; + + const u8 *crashHandler_ = nullptr; + + int jitStartOffset_ = 0; + const u8 **blockStartAddrs_ = nullptr; +}; + +} // namespace MIPSComp diff --git a/Core/MIPS/RiscV/RiscVRegCache.cpp b/Core/MIPS/RiscV/RiscVRegCache.cpp new file mode 100644 index 0000000000..6ddf909977 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVRegCache.cpp @@ -0,0 +1,1075 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Common/CPUDetect.h" +#include "Core/MIPS/RiscV/RiscVRegCache.h" +#include "Core/MIPS/JitCommon/JitState.h" +#include "Core/Reporting.h" + +#ifndef offsetof +#include "stddef.h" +#endif + +using namespace RiscVGen; +using namespace RiscVJitConstants; + +RiscVRegCache::RiscVRegCache(MIPSState *mipsState, MIPSComp::JitOptions *jo) + : mips_(mipsState), jo_(jo) { +} + +void RiscVRegCache::Init(RiscVEmitter *emitter) { + emit_ = emitter; +} + +void RiscVRegCache::Start() { + for (int i = 0; i < NUM_RVREG; i++) { + ar[i].mipsReg = IRREG_INVALID; + ar[i].isDirty = false; + ar[i].pointerified = false; + ar[i].tempLocked = false; + ar[i].normalized32 = false; + } + for (int i = 0; i < NUM_MIPSREG; i++) { + mr[i].loc = MIPSLoc::MEM; + mr[i].reg = INVALID_REG; + mr[i].imm = -1; + mr[i].spillLock = false; + mr[i].isStatic = false; + } + int numStatics; + const StaticAllocation *statics = GetStaticAllocations(numStatics); + for (int i = 0; i < numStatics; i++) { + ar[statics[i].ar].mipsReg = statics[i].mr; + ar[statics[i].ar].pointerified = statics[i].pointerified && jo_->enablePointerify; + ar[statics[i].ar].normalized32 = false; + mr[statics[i].mr].loc = MIPSLoc::RVREG; + mr[statics[i].mr].reg = statics[i].ar; + mr[statics[i].mr].isStatic = true; + mr[statics[i].mr].spillLock = true; + } + + // Treat R_ZERO a bit specially, but it's basically static alloc too. + ar[R_ZERO].mipsReg = MIPS_REG_ZERO; + ar[R_ZERO].normalized32 = true; + mr[MIPS_REG_ZERO].loc = MIPSLoc::RVREG_IMM; + mr[MIPS_REG_ZERO].reg = R_ZERO; + mr[MIPS_REG_ZERO].imm = 0; + mr[MIPS_REG_ZERO].isStatic = true; +} + +const RiscVReg *RiscVRegCache::GetMIPSAllocationOrder(int &count) { + // X8 and X9 are the most ideal for static alloc because they can be used with compression. + // Otherwise we stick to saved regs - might not be necessary. + static const RiscVReg allocationOrder[] = { + X7, X8, X9, X12, X13, X14, X5, X6, X15, X16, X17, X18, X19, X20, X21, X22, X23, X28, X29, X30, X31, + }; + static const RiscVReg allocationOrderStaticAlloc[] = { + X7, X12, X13, X14, X5, X6, X15, X16, X17, X21, X22, X23, X28, X29, X30, X31, + }; + + if (jo_->useStaticAlloc) { + count = ARRAY_SIZE(allocationOrderStaticAlloc); + return allocationOrderStaticAlloc; + } else { + count = ARRAY_SIZE(allocationOrder); + return allocationOrder; + } +} + +const RiscVRegCache::StaticAllocation *RiscVRegCache::GetStaticAllocations(int &count) { + static const StaticAllocation allocs[] = { + { MIPS_REG_SP, X8, true }, + { MIPS_REG_V0, X9 }, + { MIPS_REG_V1, X18 }, + { MIPS_REG_A0, X19 }, + { MIPS_REG_RA, X20 }, + }; + + if (jo_->useStaticAlloc) { + count = ARRAY_SIZE(allocs); + return allocs; + } else { + count = 0; + return nullptr; + } +} + +void RiscVRegCache::EmitLoadStaticRegisters() { + int count; + const StaticAllocation *allocs = GetStaticAllocations(count); + for (int i = 0; i < count; i++) { + int offset = GetMipsRegOffset(allocs[i].mr); + if (allocs[i].pointerified && jo_->enablePointerify) { + emit_->LWU(allocs[i].ar, CTXREG, offset); + emit_->ADD(allocs[i].ar, allocs[i].ar, MEMBASEREG); + } else { + emit_->LW(allocs[i].ar, CTXREG, offset); + } + } +} + +void RiscVRegCache::EmitSaveStaticRegisters() { + int count; + const StaticAllocation *allocs = GetStaticAllocations(count); + // This only needs to run once (by Asm) so checks don't need to be fast. + for (int i = 0; i < count; i++) { + int offset = GetMipsRegOffset(allocs[i].mr); + emit_->SW(allocs[i].ar, CTXREG, offset); + } +} + +void RiscVRegCache::FlushBeforeCall() { + // These registers are not preserved by function calls. + for (int i = 5; i <= 7; ++i) { + FlushRiscVReg(RiscVReg(X0 + i)); + } + for (int i = 10; i <= 17; ++i) { + FlushRiscVReg(RiscVReg(X0 + i)); + } + for (int i = 28; i <= 31; ++i) { + FlushRiscVReg(RiscVReg(X0 + i)); + } +} + +bool RiscVRegCache::IsInRAM(IRRegIndex reg) { + _dbg_assert_(IsValidReg(reg)); + return mr[reg].loc == MIPSLoc::MEM; +} + +bool RiscVRegCache::IsMapped(IRRegIndex mipsReg) { + _dbg_assert_(IsValidReg(mipsReg)); + return mr[mipsReg].loc == MIPSLoc::RVREG || mr[mipsReg].loc == MIPSLoc::RVREG_IMM; +} + +bool RiscVRegCache::IsMappedAsPointer(IRRegIndex mipsReg) { + _dbg_assert_(IsValidReg(mipsReg)); + if (mr[mipsReg].loc == MIPSLoc::RVREG) { + return ar[mr[mipsReg].reg].pointerified; + } else if (mr[mipsReg].loc == MIPSLoc::RVREG_IMM) { + if (ar[mr[mipsReg].reg].pointerified) { + ERROR_LOG(JIT, "Really shouldn't be pointerified here"); + } + } else if (mr[mipsReg].loc == MIPSLoc::RVREG_AS_PTR) { + return true; + } + return false; +} + +bool RiscVRegCache::IsMappedAsStaticPointer(IRRegIndex reg) { + if (IsMappedAsPointer(reg)) { + return mr[reg].isStatic; + } + return false; +} + +bool RiscVRegCache::IsNormalized32(IRRegIndex mipsReg) { + _dbg_assert_(IsValidReg(mipsReg)); + if (XLEN == 32) + return true; + if (mr[mipsReg].loc == MIPSLoc::RVREG || mr[mipsReg].loc == MIPSLoc::RVREG_IMM) { + return ar[mr[mipsReg].reg].normalized32; + } + return false; +} + +void RiscVRegCache::MarkDirty(RiscVReg reg, bool andNormalized32) { + // Can't mark X0 dirty. + _dbg_assert_(reg > X0 && reg <= X31); + ar[reg].isDirty = true; + ar[reg].normalized32 = andNormalized32; + // If reg is written to, pointerification is lost. + ar[reg].pointerified = false; + if (ar[reg].mipsReg != IRREG_INVALID) { + RegStatusMIPS &m = mr[ar[reg].mipsReg]; + if (m.loc == MIPSLoc::RVREG_AS_PTR || m.loc == MIPSLoc::RVREG_IMM) { + m.loc = MIPSLoc::RVREG; + m.imm = -1; + } + _dbg_assert_(m.loc == MIPSLoc::RVREG); + } +} + +void RiscVRegCache::MarkPtrDirty(RiscVReg reg) { + // Can't mark X0 dirty. + _dbg_assert_(reg > X0 && reg <= X31); + _dbg_assert_(!ar[reg].normalized32); + ar[reg].isDirty = true; + if (ar[reg].mipsReg != IRREG_INVALID) { + _dbg_assert_(mr[ar[reg].mipsReg].loc == MIPSLoc::RVREG_AS_PTR); + } else { + _dbg_assert_(ar[reg].pointerified); + } +} + +RiscVGen::RiscVReg RiscVRegCache::Normalize32(IRRegIndex mipsReg, RiscVGen::RiscVReg destReg) { + _dbg_assert_(IsValidReg(mipsReg)); + _dbg_assert_(destReg == INVALID_REG || (destReg > X0 && destReg <= X31)); + + RiscVReg reg = mr[mipsReg].reg; + if (XLEN == 32) + return reg; + + switch (mr[mipsReg].loc) { + case MIPSLoc::IMM: + case MIPSLoc::MEM: + _assert_msg_(false, "Cannot normalize an imm or mem"); + return INVALID_REG; + + case MIPSLoc::RVREG: + case MIPSLoc::RVREG_IMM: + if (!ar[mr[mipsReg].reg].normalized32) { + if (destReg == INVALID_REG) { + emit_->SEXT_W(mr[mipsReg].reg, mr[mipsReg].reg); + ar[mr[mipsReg].reg].normalized32 = true; + ar[mr[mipsReg].reg].pointerified = false; + } else { + emit_->SEXT_W(destReg, mr[mipsReg].reg); + } + } else if (destReg != INVALID_REG) { + emit_->SEXT_W(destReg, mr[mipsReg].reg); + } + break; + + case MIPSLoc::RVREG_AS_PTR: + _dbg_assert_(ar[mr[mipsReg].reg].normalized32 == false); + if (destReg == INVALID_REG) { + // If we can pointerify, SEXT_W will be enough. + if (!jo_->enablePointerify) + emit_->SUB(mr[mipsReg].reg, mr[mipsReg].reg, MEMBASEREG); + emit_->SEXT_W(mr[mipsReg].reg, mr[mipsReg].reg); + mr[mipsReg].loc = MIPSLoc::RVREG; + ar[mr[mipsReg].reg].normalized32 = true; + ar[mr[mipsReg].reg].pointerified = false; + } else if (!jo_->enablePointerify) { + emit_->SUB(destReg, mr[mipsReg].reg, MEMBASEREG); + emit_->SEXT_W(destReg, destReg); + } else { + emit_->SEXT_W(destReg, mr[mipsReg].reg); + } + break; + } + + return destReg == INVALID_REG ? reg : destReg; +} + +void RiscVRegCache::SetRegImm(RiscVReg reg, u64 imm) { + _dbg_assert_(reg != R_ZERO || imm == 0); + _dbg_assert_(reg >= X0 && reg <= X31); + // TODO: Could optimize this more for > 32 bit constants. + emit_->LI(reg, imm); + _dbg_assert_(!ar[reg].pointerified); + ar[reg].normalized32 = imm == (u64)(s64)(s32)imm; +} + +void RiscVRegCache::MapRegTo(RiscVReg reg, IRRegIndex mipsReg, MIPSMap mapFlags) { + _dbg_assert_(reg > X0 && reg <= X31); + _dbg_assert_(IsValidReg(mipsReg)); + _dbg_assert_(!mr[mipsReg].isStatic); + if (mr[mipsReg].isStatic) { + ERROR_LOG(JIT, "Cannot MapRegTo static register %d", mipsReg); + return; + } + ar[reg].isDirty = (mapFlags & MIPSMap::DIRTY) == MIPSMap::DIRTY; + if ((mapFlags & MIPSMap::NOINIT) != MIPSMap::NOINIT) { + if (mipsReg == MIPS_REG_ZERO) { + // If we get a request to load the zero register, at least we won't spend + // time on a memory access... + emit_->LI(reg, 0); + + // This way, if we SetImm() it, we'll keep it. + mr[mipsReg].loc = MIPSLoc::RVREG_IMM; + mr[mipsReg].imm = 0; + ar[reg].normalized32 = true; + } else { + switch (mr[mipsReg].loc) { + case MIPSLoc::MEM: + emit_->LW(reg, CTXREG, GetMipsRegOffset(mipsReg)); + mr[mipsReg].loc = MIPSLoc::RVREG; + ar[reg].normalized32 = true; + break; + case MIPSLoc::IMM: + SetRegImm(reg, mr[mipsReg].imm); + // IMM is always dirty. + ar[reg].isDirty = true; + + // If we are mapping dirty, it means we're gonna overwrite. + // So the imm value is no longer valid. + if ((mapFlags & MIPSMap::DIRTY) == MIPSMap::DIRTY) + mr[mipsReg].loc = MIPSLoc::RVREG; + else + mr[mipsReg].loc = MIPSLoc::RVREG_IMM; + break; + case MIPSLoc::RVREG_IMM: + // If it's not dirty, we can keep it. + if (ar[reg].isDirty) + mr[mipsReg].loc = MIPSLoc::RVREG; + break; + default: + _assert_msg_(mr[mipsReg].loc != MIPSLoc::RVREG_AS_PTR, "MapRegTo with a pointer?"); + mr[mipsReg].loc = MIPSLoc::RVREG; + break; + } + } + } else { + _dbg_assert_(mipsReg != MIPS_REG_ZERO); + _dbg_assert_(ar[reg].isDirty); + mr[mipsReg].loc = MIPSLoc::RVREG; + } + ar[reg].mipsReg = mipsReg; + ar[reg].pointerified = false; + if (ar[reg].isDirty) + ar[reg].normalized32 = (mapFlags & MIPSMap::MARK_NORM32) == MIPSMap::MARK_NORM32; + mr[mipsReg].reg = reg; +} + +RiscVReg RiscVRegCache::AllocateReg() { + int allocCount; + const RiscVReg *allocOrder = GetMIPSAllocationOrder(allocCount); + +allocate: + for (int i = 0; i < allocCount; i++) { + RiscVReg reg = allocOrder[i]; + + if (ar[reg].mipsReg == IRREG_INVALID && !ar[reg].tempLocked) { + return reg; + } + } + + // Still nothing. Let's spill a reg and goto 10. + // TODO: Use age or something to choose which register to spill? + // TODO: Spill dirty regs first? or opposite? + bool clobbered; + RiscVReg bestToSpill = FindBestToSpill(true, &clobbered); + if (bestToSpill == INVALID_REG) { + bestToSpill = FindBestToSpill(false, &clobbered); + } + + if (bestToSpill != INVALID_REG) { + if (clobbered) { + DiscardR(ar[bestToSpill].mipsReg); + } else { + FlushRiscVReg(bestToSpill); + } + // Now one must be free. + goto allocate; + } + + // Uh oh, we have all of them spilllocked.... + ERROR_LOG_REPORT(JIT, "Out of spillable registers near PC %08x", mips_->pc); + _assert_(bestToSpill != INVALID_REG); + return INVALID_REG; +} + +RiscVReg RiscVRegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) { + int allocCount; + const RiscVReg *allocOrder = GetMIPSAllocationOrder(allocCount); + + static const int UNUSED_LOOKAHEAD_OPS = 30; + + *clobbered = false; + for (int i = 0; i < allocCount; i++) { + RiscVReg reg = allocOrder[i]; + if (ar[reg].mipsReg != IRREG_INVALID && mr[ar[reg].mipsReg].spillLock) + continue; + if (ar[reg].tempLocked) + continue; + + // As it's in alloc-order, we know it's not static so we don't need to check for that. + + // TODO: Look for clobbering in the IRInst array with index? + + // Not awesome. A used reg. Let's try to avoid spilling. + // TODO: Actually check if we'd be spilling. + if (unusedOnly) { + continue; + } + + return reg; + } + + return INVALID_REG; +} + +RiscVReg RiscVRegCache::TryMapTempImm(IRRegIndex r) { + _dbg_assert_(IsValidReg(r)); + // If already mapped, no need for a temporary. + if (IsMapped(r)) { + return R(r); + } + + if (mr[r].loc == MIPSLoc::IMM) { + if (mr[r].imm == 0) { + return R_ZERO; + } + + // Try our luck - check for an exact match in another rvreg. + for (int i = 0; i < NUM_MIPSREG; ++i) { + if (mr[i].loc == MIPSLoc::RVREG_IMM && mr[i].imm == mr[r].imm) { + // Awesome, let's just use this reg. + return mr[i].reg; + } + } + } + + return INVALID_REG; +} + +RiscVReg RiscVRegCache::GetAndLockTempR() { + RiscVReg reg = AllocateReg(); + if (reg != INVALID_REG) { + ar[reg].tempLocked = true; + } + return reg; +} + +RiscVReg RiscVRegCache::MapReg(IRRegIndex mipsReg, MIPSMap mapFlags) { + _dbg_assert_(IsValidReg(mipsReg)); + + // TODO: Optimization to force HI/LO to be combined? + + if (mipsReg == IRREG_INVALID) { + ERROR_LOG(JIT, "Cannot map invalid register"); + return INVALID_REG; + } + + RiscVReg riscvReg = mr[mipsReg].reg; + + if (mr[mipsReg].isStatic) { + _dbg_assert_(riscvReg != INVALID_REG); + if (riscvReg == INVALID_REG) { + ERROR_LOG(JIT, "MapReg on statically mapped reg %d failed - riscvReg got lost", mipsReg); + } + if (mr[mipsReg].loc == MIPSLoc::IMM) { + // Back into the register, with or without the imm value. + // If noinit, the MAP_DIRTY check below will take care of the rest. + if ((mapFlags & MIPSMap::NOINIT) != MIPSMap::NOINIT) { + // This may set normalized32 to true. + SetRegImm(riscvReg, mr[mipsReg].imm); + mr[mipsReg].loc = MIPSLoc::RVREG_IMM; + ar[riscvReg].pointerified = false; + } + if ((mapFlags & MIPSMap::MARK_NORM32) == MIPSMap::MARK_NORM32) + ar[riscvReg].normalized32 = true; + } else if (mr[mipsReg].loc == MIPSLoc::RVREG_AS_PTR) { + // Was mapped as pointer, now we want it mapped as a value, presumably to + // add or subtract stuff to it. + if ((mapFlags & MIPSMap::NOINIT) != MIPSMap::NOINIT) { +#ifdef MASKED_PSP_MEMORY + _dbg_assert_(!ar[riscvReg].isDirty && (mapFlags & MIPSMap::DIRTY) != MIPSMap::DIRTY); +#endif + emit_->SUB(riscvReg, riscvReg, MEMBASEREG); + } + mr[mipsReg].loc = MIPSLoc::RVREG; + ar[riscvReg].normalized32 = false; + } + // Erasing the imm on dirty (necessary since otherwise we will still think it's ML_RVREG_IMM and return + // true for IsImm and calculate crazily wrong things). /unknown + if ((mapFlags & MIPSMap::DIRTY) == MIPSMap::DIRTY) { + // As we are dirty, can't keep RVREG_IMM, we will quickly drift out of sync + mr[mipsReg].loc = MIPSLoc::RVREG; + ar[riscvReg].pointerified = false; + ar[riscvReg].isDirty = true; + ar[riscvReg].normalized32 = (mapFlags & MIPSMap::MARK_NORM32) == MIPSMap::MARK_NORM32; + } else if ((mapFlags & MIPSMap::MARK_NORM32) == MIPSMap::MARK_NORM32) { + ar[riscvReg].normalized32 = true; + } + return mr[mipsReg].reg; + } + + // Let's see if it's already mapped. If so we just need to update the dirty flag. + // We don't need to check for ML_NOINIT because we assume that anyone who maps + // with that flag immediately writes a "known" value to the register. + if (mr[mipsReg].loc == MIPSLoc::RVREG || mr[mipsReg].loc == MIPSLoc::RVREG_IMM) { + _dbg_assert_(riscvReg != INVALID_REG && ar[riscvReg].mipsReg == mipsReg); + if (ar[riscvReg].mipsReg != mipsReg) { + ERROR_LOG_REPORT(JIT, "Register mapping out of sync! %i", mipsReg); + } + if ((mapFlags & MIPSMap::DIRTY) == MIPSMap::DIRTY) { + // Mapping dirty means the old imm value is invalid. + mr[mipsReg].loc = MIPSLoc::RVREG; + ar[riscvReg].isDirty = true; + // If reg is written to, pointerification is lost. + ar[riscvReg].pointerified = false; + ar[riscvReg].normalized32 = (mapFlags & MIPSMap::MARK_NORM32) == MIPSMap::MARK_NORM32; + } else if ((mapFlags & MIPSMap::MARK_NORM32) == MIPSMap::MARK_NORM32) { + ar[riscvReg].normalized32 = true; + } + + return mr[mipsReg].reg; + } else if (mr[mipsReg].loc == MIPSLoc::RVREG_AS_PTR) { + // Was mapped as pointer, now we want it mapped as a value, presumably to + // add or subtract stuff to it. + if ((mapFlags & MIPSMap::NOINIT) != MIPSMap::NOINIT) { +#ifdef MASKED_PSP_MEMORY + _dbg_assert_(!ar[riscvReg].isDirty && (mapFlags & MAP_DIRTY) == 0); +#endif + emit_->SUB(riscvReg, riscvReg, MEMBASEREG); + } + mr[mipsReg].loc = MIPSLoc::RVREG; + if ((mapFlags & MIPSMap::DIRTY) == MIPSMap::DIRTY) { + ar[riscvReg].isDirty = true; + } + // Let's always set this false, the SUB won't normalize. + ar[riscvReg].normalized32 = false; + return mr[mipsReg].reg; + } + + // Okay, not mapped, so we need to allocate an RV register. + RiscVReg reg = AllocateReg(); + if (reg != INVALID_REG) { + // Grab it, and load the value into it (if requested). + MapRegTo(reg, mipsReg, mapFlags); + } + + return reg; +} + +RiscVReg RiscVRegCache::MapRegAsPointer(IRRegIndex reg) { + _dbg_assert_(IsValidRegNoZero(reg)); + + // Already mapped. + if (mr[reg].loc == MIPSLoc::RVREG_AS_PTR) { + return mr[reg].reg; + } + + RiscVReg riscvReg = INVALID_REG; + if (mr[reg].loc != MIPSLoc::RVREG && mr[reg].loc != MIPSLoc::RVREG_IMM) { + riscvReg = MapReg(reg); + } else { + riscvReg = mr[reg].reg; + } + + if (mr[reg].loc == MIPSLoc::RVREG || mr[reg].loc == MIPSLoc::RVREG_IMM) { + // If there was an imm attached, discard it. + mr[reg].loc = MIPSLoc::RVREG; + if (!jo_->enablePointerify) { + // Convert to a pointer by adding the base and clearing off the top bits. + // If SP, we can probably avoid the top bit clear, let's play with that later. + AddMemBase(riscvReg); + mr[reg].loc = MIPSLoc::RVREG_AS_PTR; + } else if (!ar[riscvReg].pointerified) { + AddMemBase(riscvReg); + ar[riscvReg].pointerified = true; + } + ar[riscvReg].normalized32 = false; + } else { + ERROR_LOG(JIT, "MapRegAsPointer : MapReg failed to allocate a register?"); + } + return riscvReg; +} + +void RiscVRegCache::AddMemBase(RiscVGen::RiscVReg reg) { + _assert_(reg >= X0 && reg <= X31); +#ifdef MASKED_PSP_MEMORY + // This destroys the value... + _dbg_assert_(!ar[reg].isDirty); + emit_->SLLIW(reg, reg, 2); + emit_->SRLIW(reg, reg, 2); + emit_->ADD(reg, reg, MEMBASEREG); +#else + // Clear the top bits to be safe. + if (cpu_info.RiscV_Zba) { + emit_->ADD_UW(reg, reg, MEMBASEREG); + } else { + _assert_(XLEN == 64); + emit_->SLLI(reg, reg, 32); + emit_->SRLI(reg, reg, 32); + emit_->ADD(reg, reg, MEMBASEREG); + } +#endif + ar[reg].normalized32 = false; +} + +void RiscVRegCache::MapIn(IRRegIndex rs) { + MapReg(rs); +} + +void RiscVRegCache::MapInIn(IRRegIndex rd, IRRegIndex rs) { + SpillLock(rd, rs); + MapReg(rd); + MapReg(rs); + ReleaseSpillLock(rd, rs); +} + +void RiscVRegCache::MapDirtyIn(IRRegIndex rd, IRRegIndex rs, MapType type) { + SpillLock(rd, rs); + bool load = type == MapType::ALWAYS_LOAD || rd == rs; + MIPSMap norm32 = type == MapType::AVOID_LOAD_MARK_NORM32 ? MIPSMap::MARK_NORM32 : MIPSMap::INIT; + MapReg(rd, (load ? MIPSMap::DIRTY : MIPSMap::NOINIT) | norm32); + MapReg(rs); + ReleaseSpillLock(rd, rs); +} + +void RiscVRegCache::MapDirtyInIn(IRRegIndex rd, IRRegIndex rs, IRRegIndex rt, MapType type) { + SpillLock(rd, rs, rt); + bool load = type == MapType::ALWAYS_LOAD || (rd == rs || rd == rt); + MIPSMap norm32 = type == MapType::AVOID_LOAD_MARK_NORM32 ? MIPSMap::MARK_NORM32 : MIPSMap::INIT; + MapReg(rd, (load ? MIPSMap::DIRTY : MIPSMap::NOINIT) | norm32); + MapReg(rt); + MapReg(rs); + ReleaseSpillLock(rd, rs, rt); +} + +void RiscVRegCache::MapDirtyDirtyIn(IRRegIndex rd1, IRRegIndex rd2, IRRegIndex rs, MapType type) { + SpillLock(rd1, rd2, rs); + bool load1 = type == MapType::ALWAYS_LOAD || rd1 == rs; + bool load2 = type == MapType::ALWAYS_LOAD || rd2 == rs; + MIPSMap norm32 = type == MapType::AVOID_LOAD_MARK_NORM32 ? MIPSMap::MARK_NORM32 : MIPSMap::INIT; + MapReg(rd1, (load1 ? MIPSMap::DIRTY : MIPSMap::NOINIT) | norm32); + MapReg(rd2, (load2 ? MIPSMap::DIRTY : MIPSMap::NOINIT) | norm32); + MapReg(rs); + ReleaseSpillLock(rd1, rd2, rs); +} + +void RiscVRegCache::MapDirtyDirtyInIn(IRRegIndex rd1, IRRegIndex rd2, IRRegIndex rs, IRRegIndex rt, MapType type) { + SpillLock(rd1, rd2, rs, rt); + bool load1 = type == MapType::ALWAYS_LOAD || (rd1 == rs || rd1 == rt); + bool load2 = type == MapType::ALWAYS_LOAD || (rd2 == rs || rd2 == rt); + MIPSMap norm32 = type == MapType::AVOID_LOAD_MARK_NORM32 ? MIPSMap::MARK_NORM32 : MIPSMap::INIT; + MapReg(rd1, (load1 ? MIPSMap::DIRTY : MIPSMap::NOINIT) | norm32); + MapReg(rd2, (load2 ? MIPSMap::DIRTY : MIPSMap::NOINIT) | norm32); + MapReg(rt); + MapReg(rs); + ReleaseSpillLock(rd1, rd2, rs, rt); +} + +void RiscVRegCache::FlushRiscVReg(RiscVReg r) { + _dbg_assert_(r > X0 && r <= X31); + _dbg_assert_(ar[r].mipsReg != MIPS_REG_ZERO); + _dbg_assert_(!mr[ar[r].mipsReg].isStatic); + if (r == INVALID_REG) { + ERROR_LOG(JIT, "FlushRiscVReg called on invalid register %d", r); + return; + } + if (ar[r].mipsReg == IRREG_INVALID) { + // Nothing to do, reg not mapped. + _dbg_assert_(!ar[r].isDirty); + return; + } + if (mr[ar[r].mipsReg].isStatic) { + ERROR_LOG(JIT, "Cannot FlushRiscVReg a statically mapped register"); + return; + } + auto &mreg = mr[ar[r].mipsReg]; + if (mreg.loc == MIPSLoc::RVREG_IMM || ar[r].mipsReg == MIPS_REG_ZERO) { + // We know its immediate value, no need to STR now. + mreg.loc = MIPSLoc::IMM; + mreg.reg = INVALID_REG; + } else { + if (mreg.loc == MIPSLoc::IMM || ar[r].isDirty) { + if (mreg.loc == MIPSLoc::RVREG_AS_PTR) { + // Unpointerify, in case dirty. +#ifdef MASKED_PSP_MEMORY + _dbg_assert_(!ar[r].isDirty); +#endif + emit_->SUB(r, r, MEMBASEREG); + mreg.loc = MIPSLoc::RVREG; + ar[r].normalized32 = false; + } + RiscVReg storeReg = RiscVRegForFlush(ar[r].mipsReg); + if (storeReg != INVALID_REG) + emit_->SW(storeReg, CTXREG, GetMipsRegOffset(ar[r].mipsReg)); + } + mreg.loc = MIPSLoc::MEM; + mreg.reg = INVALID_REG; + mreg.imm = -1; + } + ar[r].isDirty = false; + ar[r].mipsReg = IRREG_INVALID; + ar[r].pointerified = false; +} + +void RiscVRegCache::DiscardR(IRRegIndex mipsReg) { + _dbg_assert_(IsValidRegNoZero(mipsReg)); + if (mr[mipsReg].isStatic) { + // Simply do nothing unless it's an IMM/RVREG_IMM/RVREG_AS_PTR, in case we just switch it over to RVREG, losing the value. + RiscVReg riscvReg = mr[mipsReg].reg; + _dbg_assert_(riscvReg != INVALID_REG); + if (mipsReg == MIPS_REG_ZERO) { + // Shouldn't happen, but in case it does. + mr[mipsReg].loc = MIPSLoc::RVREG_IMM; + mr[mipsReg].reg = R_ZERO; + mr[mipsReg].imm = 0; + } else if (mr[mipsReg].loc == MIPSLoc::RVREG_IMM || mr[mipsReg].loc == MIPSLoc::IMM || mr[mipsReg].loc == MIPSLoc::RVREG_AS_PTR) { + // Ignore the imm value, restore sanity + mr[mipsReg].loc = MIPSLoc::RVREG; + ar[riscvReg].pointerified = false; + ar[riscvReg].isDirty = false; + ar[riscvReg].normalized32 = false; + } + return; + } + const MIPSLoc prevLoc = mr[mipsReg].loc; + if (prevLoc == MIPSLoc::RVREG || prevLoc == MIPSLoc::RVREG_IMM || prevLoc == MIPSLoc::RVREG_AS_PTR) { + RiscVReg riscvReg = mr[mipsReg].reg; + _dbg_assert_(riscvReg != INVALID_REG); + ar[riscvReg].mipsReg = IRREG_INVALID; + ar[riscvReg].pointerified = false; + ar[riscvReg].isDirty = false; + ar[riscvReg].normalized32 = false; + mr[mipsReg].reg = INVALID_REG; + mr[mipsReg].loc = MIPSLoc::MEM; + mr[mipsReg].imm = -1; + } + if (prevLoc == MIPSLoc::IMM && mipsReg != MIPS_REG_ZERO) { + mr[mipsReg].loc = MIPSLoc::MEM; + mr[mipsReg].imm = -1; + } +} + +RiscVReg RiscVRegCache::RiscVRegForFlush(IRRegIndex r) { + _dbg_assert_(IsValidReg(r)); + if (mr[r].isStatic) + return INVALID_REG; // No flushing needed + + switch (mr[r].loc) { + case MIPSLoc::IMM: + if (r == MIPS_REG_ZERO) { + return INVALID_REG; + } + // Zero is super easy. + if (mr[r].imm == 0) { + return R_ZERO; + } + // Could we get lucky? Check for an exact match in another rvreg. + for (int i = 0; i < NUM_MIPSREG; ++i) { + if (mr[i].loc == MIPSLoc::RVREG_IMM && mr[i].imm == mr[r].imm) { + // Awesome, let's just store this reg. + return mr[i].reg; + } + } + return INVALID_REG; + + case MIPSLoc::RVREG: + case MIPSLoc::RVREG_IMM: + if (mr[r].reg == INVALID_REG) { + ERROR_LOG_REPORT(JIT, "RiscVRegForFlush: MipsReg %d had bad riscvReg", r); + return INVALID_REG; + } + // No need to flush if it's zero or not dirty. + if (r == MIPS_REG_ZERO || !ar[mr[r].reg].isDirty) { + return INVALID_REG; + } + // TODO: Lo/hi optimization? + return mr[r].reg; + + case MIPSLoc::RVREG_AS_PTR: + return INVALID_REG; + + case MIPSLoc::MEM: + return INVALID_REG; + + default: + ERROR_LOG_REPORT(JIT, "RiscVRegForFlush: MipsReg %d with invalid location %d", r, (int)mr[r].loc); + return INVALID_REG; + } +} + +void RiscVRegCache::FlushR(IRRegIndex r) { + _dbg_assert_(IsValidRegNoZero(r)); + if (mr[r].isStatic) { + ERROR_LOG(JIT, "Cannot flush static reg %d", r); + return; + } + + switch (mr[r].loc) { + case MIPSLoc::IMM: + // IMM is always "dirty". + // TODO: HI/LO optimization? + if (r != MIPS_REG_ZERO) { + // Try to optimize using a different reg. + RiscVReg storeReg = RiscVRegForFlush(r); + if (storeReg == INVALID_REG) { + SetRegImm(SCRATCH1, mr[r].imm); + storeReg = SCRATCH1; + } + emit_->SW(storeReg, CTXREG, GetMipsRegOffset(r)); + } + break; + + case MIPSLoc::RVREG: + case MIPSLoc::RVREG_IMM: + if (ar[mr[r].reg].isDirty) { + RiscVReg storeReg = RiscVRegForFlush(r); + if (storeReg != INVALID_REG) { + emit_->SW(storeReg, CTXREG, GetMipsRegOffset(r)); + } + ar[mr[r].reg].isDirty = false; + } + ar[mr[r].reg].mipsReg = IRREG_INVALID; + ar[mr[r].reg].pointerified = false; + break; + + case MIPSLoc::RVREG_AS_PTR: + if (ar[mr[r].reg].isDirty) { +#ifdef MASKED_PSP_MEMORY + // This is kinda bad, because we've cleared bits in it. + _dbg_assert_(!ar[mr[r].reg].isDirty); +#endif + emit_->SUB(mr[r].reg, mr[r].reg, MEMBASEREG); + // We set this so RiscVRegForFlush knows it's no longer a pointer. + mr[r].loc = MIPSLoc::RVREG; + RiscVReg storeReg = RiscVRegForFlush(r); + if (storeReg != INVALID_REG) { + emit_->SW(storeReg, CTXREG, GetMipsRegOffset(r)); + } + ar[mr[r].reg].isDirty = false; + } + ar[mr[r].reg].mipsReg = IRREG_INVALID; + break; + + case MIPSLoc::MEM: + // Already there, nothing to do. + break; + + default: + ERROR_LOG_REPORT(JIT, "FlushR: MipsReg %d with invalid location %d", r, (int)mr[r].loc); + break; + } + if (r == MIPS_REG_ZERO) { + mr[r].loc = MIPSLoc::RVREG_IMM; + mr[r].reg = R_ZERO; + mr[r].imm = 0; + } else { + mr[r].loc = MIPSLoc::MEM; + mr[r].reg = INVALID_REG; + mr[r].imm = -1; + } +} + +void RiscVRegCache::FlushAll() { + // Note: make sure not to change the registers when flushing: + // Branching code expects the armreg to retain its value. + + // TODO: HI/LO optimization? + + // Final pass to grab any that were left behind. + for (int i = 1; i < NUM_MIPSREG; i++) { + IRRegIndex mipsReg = IRRegIndex(i); + if (mr[i].isStatic) { + RiscVReg riscvReg = mr[i].reg; + // Cannot leave any IMMs in registers, not even ML_ARMREG_IMM, can confuse the regalloc later if this flush is mid-block + // due to an interpreter fallback that changes the register. + if (mr[i].loc == MIPSLoc::IMM) { + SetRegImm(mr[i].reg, mr[i].imm); + mr[i].loc = MIPSLoc::RVREG; + ar[riscvReg].pointerified = false; + } else if (mr[i].loc == MIPSLoc::RVREG_IMM) { + // The register already contains the immediate. + if (ar[riscvReg].pointerified) { + ERROR_LOG(JIT, "RVREG_IMM but pointerified. Wrong."); + ar[riscvReg].pointerified = false; + } + mr[i].loc = MIPSLoc::RVREG; + } else if (mr[i].loc == MIPSLoc::RVREG_AS_PTR) { +#ifdef MASKED_PSP_MEMORY + _dbg_assert_(!ar[riscvReg].isDirty); +#endif + emit_->SUB(riscvReg, riscvReg, MEMBASEREG); + mr[i].loc = MIPSLoc::RVREG; + } + if (i != MIPS_REG_ZERO && mr[i].reg == INVALID_REG) { + ERROR_LOG(JIT, "RV reg of static %i is invalid", i); + continue; + } + } else if (IsValidRegNoZero(mipsReg)) { + FlushR(mipsReg); + } + } + + int count = 0; + const StaticAllocation *allocs = GetStaticAllocations(count); + for (int i = 0; i < count; i++) { + if (allocs[i].pointerified && !ar[allocs[i].ar].pointerified && jo_->enablePointerify) { + // Re-pointerify + _dbg_assert_(mr[allocs[i].mr].loc == MIPSLoc::RVREG); + AddMemBase(allocs[i].ar); + ar[allocs[i].ar].pointerified = true; + } else if (!allocs[i].pointerified) { + // If this register got pointerified on the way, mark it as not. + // This is so that after save/reload (like in an interpreter fallback), + // it won't be regarded as such, as it may no longer be. + ar[allocs[i].ar].pointerified = false; + } + } + // Sanity check + for (int i = 0; i < NUM_RVREG; i++) { + if (ar[i].mipsReg != IRREG_INVALID && mr[ar[i].mipsReg].isStatic == false) { + ERROR_LOG_REPORT(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg); + } + } +} + +void RiscVRegCache::SetImm(IRRegIndex r, u64 immVal) { + _dbg_assert_(IsValidReg(r)); + if (r == MIPS_REG_ZERO && immVal != 0) { + ERROR_LOG_REPORT(JIT, "Trying to set immediate %08x to r0", (u32)immVal); + return; + } + + if (mr[r].loc == MIPSLoc::RVREG_IMM && mr[r].imm == immVal) { + // Already have that value, let's keep it in the reg. + return; + } + + // TODO: HI/LO optimization? + // All regs on the PSP are 32 bit, but LO we treat as HI:LO so is 64 full bits. + immVal = immVal & 0xFFFFFFFF; + + if (mr[r].isStatic) { + mr[r].loc = MIPSLoc::IMM; + mr[r].imm = immVal; + ar[mr[r].reg].pointerified = false; + ar[mr[r].reg].normalized32 = false; + // We do not change reg to INVALID_REG for obvious reasons.. + } else { + // Zap existing value if cached in a reg + if (mr[r].reg != INVALID_REG) { + ar[mr[r].reg].mipsReg = IRREG_INVALID; + ar[mr[r].reg].isDirty = false; + ar[mr[r].reg].pointerified = false; + ar[mr[r].reg].normalized32 = false; + } + mr[r].loc = MIPSLoc::IMM; + mr[r].imm = immVal; + mr[r].reg = INVALID_REG; + } +} + +bool RiscVRegCache::IsImm(IRRegIndex r) const { + _dbg_assert_(IsValidReg(r)); + if (r == MIPS_REG_ZERO) + return true; + else + return mr[r].loc == MIPSLoc::IMM || mr[r].loc == MIPSLoc::RVREG_IMM; +} + +bool RiscVRegCache::IsPureImm(IRRegIndex r) const { + _dbg_assert_(IsValidReg(r)); + if (r == MIPS_REG_ZERO) + return true; + else + return mr[r].loc == MIPSLoc::IMM; +} + +u64 RiscVRegCache::GetImm(IRRegIndex r) const { + _dbg_assert_(IsValidReg(r)); + if (r == MIPS_REG_ZERO) + return 0; + if (mr[r].loc != MIPSLoc::IMM && mr[r].loc != MIPSLoc::RVREG_IMM) { + ERROR_LOG_REPORT(JIT, "Trying to get imm from non-imm register %i", r); + } + return mr[r].imm; +} + +int RiscVRegCache::GetMipsRegOffset(IRRegIndex r) { + _dbg_assert_(IsValidReg(r)); + return r * 4; +} + +bool RiscVRegCache::IsValidReg(IRRegIndex r) const { + if (r < 0 || r >= NUM_MIPSREG) + return false; + + // See MIPSState for these offsets. + + // Don't allow FPU or VFPU regs here. + if (r >= 32 && r < 32 + 32 + 128) + return false; + // Also disallow VFPU temps. + if (r >= 224 && r < 224 + 16) + return false; + // Don't allow nextPC, etc. since it's probably a mistake. + if (r > 245) + return false; + // Don't allow PC either. + if (r == 241) + return false; + + return true; +} + +bool RiscVRegCache::IsValidRegNoZero(IRRegIndex r) const { + return IsValidReg(r) && r != MIPS_REG_ZERO; +} + +void RiscVRegCache::SpillLock(IRRegIndex r1, IRRegIndex r2, IRRegIndex r3, IRRegIndex r4) { + _dbg_assert_(IsValidReg(r1)); + _dbg_assert_(r2 == IRREG_INVALID || IsValidReg(r2)); + _dbg_assert_(r3 == IRREG_INVALID || IsValidReg(r3)); + _dbg_assert_(r4 == IRREG_INVALID || IsValidReg(r4)); + mr[r1].spillLock = true; + if (r2 != IRREG_INVALID) mr[r2].spillLock = true; + if (r3 != IRREG_INVALID) mr[r3].spillLock = true; + if (r4 != IRREG_INVALID) mr[r4].spillLock = true; +} + +void RiscVRegCache::ReleaseSpillLocksAndDiscardTemps() { + for (int i = 0; i < NUM_MIPSREG; i++) { + if (!mr[i].isStatic) + mr[i].spillLock = false; + } + for (int i = 0; i < NUM_RVREG; i++) { + ar[i].tempLocked = false; + } +} + +void RiscVRegCache::ReleaseSpillLock(IRRegIndex r1, IRRegIndex r2, IRRegIndex r3, IRRegIndex r4) { + _dbg_assert_(IsValidReg(r1)); + _dbg_assert_(r2 == IRREG_INVALID || IsValidReg(r2)); + _dbg_assert_(r3 == IRREG_INVALID || IsValidReg(r3)); + _dbg_assert_(r4 == IRREG_INVALID || IsValidReg(r4)); + if (!mr[r1].isStatic) + mr[r1].spillLock = false; + if (r2 != IRREG_INVALID && !mr[r2].isStatic) + mr[r2].spillLock = false; + if (r3 != IRREG_INVALID && !mr[r3].isStatic) + mr[r3].spillLock = false; + if (r4 != IRREG_INVALID && !mr[r4].isStatic) + mr[r4].spillLock = false; +} + +RiscVReg RiscVRegCache::R(IRRegIndex mipsReg) { + _dbg_assert_(IsValidReg(mipsReg)); + _dbg_assert_(mr[mipsReg].loc == MIPSLoc::RVREG || mr[mipsReg].loc == MIPSLoc::RVREG_IMM); + if (mr[mipsReg].loc == MIPSLoc::RVREG || mr[mipsReg].loc == MIPSLoc::RVREG_IMM) { + return mr[mipsReg].reg; + } else { + ERROR_LOG_REPORT(JIT, "Reg %i not in riscv reg", mipsReg); + return INVALID_REG; // BAAAD + } +} + +RiscVReg RiscVRegCache::RPtr(IRRegIndex mipsReg) { + _dbg_assert_(IsValidReg(mipsReg)); + _dbg_assert_(mr[mipsReg].loc == MIPSLoc::RVREG || mr[mipsReg].loc == MIPSLoc::RVREG_IMM || mr[mipsReg].loc == MIPSLoc::RVREG_AS_PTR); + if (mr[mipsReg].loc == MIPSLoc::RVREG_AS_PTR) { + return mr[mipsReg].reg; + } else if (mr[mipsReg].loc == MIPSLoc::RVREG || mr[mipsReg].loc == MIPSLoc::RVREG_IMM) { + int rv = mr[mipsReg].reg; + _dbg_assert_(ar[rv].pointerified); + if (ar[rv].pointerified) { + return mr[mipsReg].reg; + } else { + ERROR_LOG(JIT, "Tried to use a non-pointer register as a pointer"); + return INVALID_REG; + } + } else { + ERROR_LOG_REPORT(JIT, "Reg %i not in riscv reg", mipsReg); + return INVALID_REG; // BAAAD + } +} diff --git a/Core/MIPS/RiscV/RiscVRegCache.h b/Core/MIPS/RiscV/RiscVRegCache.h new file mode 100644 index 0000000000..3984748318 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVRegCache.h @@ -0,0 +1,191 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "Common/RiscVEmitter.h" +#include "Core/MIPS/MIPS.h" + +namespace RiscVJitConstants { + +// Note: we don't support 32-bit or 128-bit CPUs currently. +constexpr int XLEN = 64; + +const RiscVGen::RiscVReg DOWNCOUNTREG = RiscVGen::X24; +const RiscVGen::RiscVReg JITBASEREG = RiscVGen::X25; +const RiscVGen::RiscVReg CTXREG = RiscVGen::X26; +const RiscVGen::RiscVReg MEMBASEREG = RiscVGen::X27; +// TODO: Experiment. X7-X13 are compressed regs. X8/X9 are saved so nice for static alloc, though. +const RiscVGen::RiscVReg SCRATCH1 = RiscVGen::X10; +const RiscVGen::RiscVReg SCRATCH2 = RiscVGen::X11; + +// Have to account for all of them due to temps, etc. +constexpr int TOTAL_MAPPABLE_MIPSREGS = 256; + +enum class MIPSLoc { + IMM, + RVREG, + // In a native reg, but an adjusted pointer (not pointerified - unaligned.) + RVREG_AS_PTR, + // In a native reg, but also has a known immediate value. + RVREG_IMM, + MEM, +}; + +// Initing is the default so the flag is reversed. +enum class MIPSMap { + INIT = 0, + DIRTY = 1, + NOINIT = 2 | DIRTY, + MARK_NORM32 = 4, +}; +static inline MIPSMap operator |(const MIPSMap &lhs, const MIPSMap &rhs) { + return MIPSMap((int)lhs | (int)rhs); +} +static inline MIPSMap operator &(const MIPSMap &lhs, const MIPSMap &rhs) { + return MIPSMap((int)lhs & (int)rhs); +} + +enum class MapType { + AVOID_LOAD, + AVOID_LOAD_MARK_NORM32, + ALWAYS_LOAD, +}; + +} // namespace RiscVJitConstants + +namespace MIPSAnalyst { +struct AnalysisResults; +}; + +namespace MIPSComp { +struct JitOptions; +} + +// Not using IRReg since this can be -1. +typedef int IRRegIndex; +constexpr IRRegIndex IRREG_INVALID = -1; + +struct RegStatusRiscV { + IRRegIndex mipsReg; // if -1, no mipsreg attached. + bool isDirty; // Should the register be written back? + bool pointerified; // Has added the memory base into the top part of the reg. Note - still usable as 32-bit reg (in only some cases.) + bool tempLocked; // Reserved for a temp register. + bool normalized32; // 32 bits sign extended to XLEN. RISC-V can't always address just the low 32-bits, so this matters. +}; + +struct RegStatusMIPS { + // Where is this MIPS register? + RiscVJitConstants::MIPSLoc loc; + // Data (both or only one may be used, depending on loc.) + u64 imm; + RiscVGen::RiscVReg reg; // reg index + bool spillLock; // if true, this register cannot be spilled. + bool isStatic; // if true, this register will not be written back to ram by the regcache + // If loc == ML_MEM, it's back in its location in the CPU context struct. +}; + +class RiscVRegCache { +public: + RiscVRegCache(MIPSState *mipsState, MIPSComp::JitOptions *jo); + ~RiscVRegCache() {} + + void Init(RiscVGen::RiscVEmitter *emitter); + // TODO: Maybe pass in IR block and start PC for logging/debugging? + void Start(); + + // Protect the arm register containing a MIPS register from spilling, to ensure that + // it's being kept allocated. + void SpillLock(IRRegIndex reg, IRRegIndex reg2 = IRREG_INVALID, IRRegIndex reg3 = IRREG_INVALID, IRRegIndex reg4 = IRREG_INVALID); + void ReleaseSpillLock(IRRegIndex reg, IRRegIndex reg2 = IRREG_INVALID, IRRegIndex reg3 = IRREG_INVALID, IRRegIndex reg4 = IRREG_INVALID); + void ReleaseSpillLocksAndDiscardTemps(); + + void SetImm(IRRegIndex reg, u64 immVal); + bool IsImm(IRRegIndex reg) const; + bool IsPureImm(IRRegIndex reg) const; + u64 GetImm(IRRegIndex reg) const; + // Optimally set a register to an imm value (possibly using another register.) + void SetRegImm(RiscVGen::RiscVReg reg, u64 imm); + + // May fail and return INVALID_REG if it needs flushing. + RiscVGen::RiscVReg TryMapTempImm(IRRegIndex); + + // Returns an ARM register containing the requested MIPS register. + RiscVGen::RiscVReg MapReg(IRRegIndex reg, RiscVJitConstants::MIPSMap mapFlags = RiscVJitConstants::MIPSMap::INIT); + RiscVGen::RiscVReg MapRegAsPointer(IRRegIndex reg); + + bool IsMapped(IRRegIndex reg); + bool IsMappedAsPointer(IRRegIndex reg); + bool IsMappedAsStaticPointer(IRRegIndex reg); + bool IsInRAM(IRRegIndex reg); + bool IsNormalized32(IRRegIndex reg); + + void MarkDirty(RiscVGen::RiscVReg reg, bool andNormalized32 = false); + void MarkPtrDirty(RiscVGen::RiscVReg reg); + // Copies to another reg if specified, otherwise same reg. + RiscVGen::RiscVReg Normalize32(IRRegIndex reg, RiscVGen::RiscVReg destReg = RiscVGen::INVALID_REG); + void MapIn(IRRegIndex rs); + void MapInIn(IRRegIndex rd, IRRegIndex rs); + void MapDirtyIn(IRRegIndex rd, IRRegIndex rs, RiscVJitConstants::MapType type = RiscVJitConstants::MapType::AVOID_LOAD); + void MapDirtyInIn(IRRegIndex rd, IRRegIndex rs, IRRegIndex rt, RiscVJitConstants::MapType type = RiscVJitConstants::MapType::AVOID_LOAD); + void MapDirtyDirtyIn(IRRegIndex rd1, IRRegIndex rd2, IRRegIndex rs, RiscVJitConstants::MapType type = RiscVJitConstants::MapType::AVOID_LOAD); + void MapDirtyDirtyInIn(IRRegIndex rd1, IRRegIndex rd2, IRRegIndex rs, IRRegIndex rt, RiscVJitConstants::MapType type = RiscVJitConstants::MapType::AVOID_LOAD); + void FlushRiscVReg(RiscVGen::RiscVReg r); + void FlushBeforeCall(); + void FlushAll(); + void FlushR(IRRegIndex r); + void DiscardR(IRRegIndex r); + + RiscVGen::RiscVReg GetAndLockTempR(); + + RiscVGen::RiscVReg R(IRRegIndex preg); // Returns a cached register, while checking that it's NOT mapped as a pointer + RiscVGen::RiscVReg RPtr(IRRegIndex preg); // Returns a cached register, if it has been mapped as a pointer + + // These are called once on startup to generate functions, that you should then call. + void EmitLoadStaticRegisters(); + void EmitSaveStaticRegisters(); + +private: + struct StaticAllocation { + IRRegIndex mr; + RiscVGen::RiscVReg ar; + bool pointerified; + }; + const StaticAllocation *GetStaticAllocations(int &count); + const RiscVGen::RiscVReg *GetMIPSAllocationOrder(int &count); + void MapRegTo(RiscVGen::RiscVReg reg, IRRegIndex mipsReg, RiscVJitConstants::MIPSMap mapFlags); + RiscVGen::RiscVReg AllocateReg(); + RiscVGen::RiscVReg FindBestToSpill(bool unusedOnly, bool *clobbered); + RiscVGen::RiscVReg RiscVRegForFlush(IRRegIndex r); + void AddMemBase(RiscVGen::RiscVReg reg); + int GetMipsRegOffset(IRRegIndex r); + + bool IsValidReg(IRRegIndex r) const; + bool IsValidRegNoZero(IRRegIndex r) const; + + MIPSState *mips_; + RiscVGen::RiscVEmitter *emit_ = nullptr; + MIPSComp::JitOptions *jo_; + + enum { + NUM_RVREG = 32, // 31 actual registers, plus the zero/sp register which is not mappable. + NUM_MIPSREG = RiscVJitConstants::TOTAL_MAPPABLE_MIPSREGS, + }; + + RegStatusRiscV ar[NUM_RVREG]{}; + RegStatusMIPS mr[NUM_MIPSREG]{}; +}; diff --git a/Core/MIPS/RiscV/RiscVRegCacheFPU.cpp b/Core/MIPS/RiscV/RiscVRegCacheFPU.cpp new file mode 100644 index 0000000000..7287ea10b7 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVRegCacheFPU.cpp @@ -0,0 +1,16 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. diff --git a/Core/MIPS/RiscV/RiscVRegCacheFPU.h b/Core/MIPS/RiscV/RiscVRegCacheFPU.h new file mode 100644 index 0000000000..48b5ab53e0 --- /dev/null +++ b/Core/MIPS/RiscV/RiscVRegCacheFPU.h @@ -0,0 +1,18 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once diff --git a/Core/MIPS/fake/FakeJit.cpp b/Core/MIPS/fake/FakeJit.cpp index f833526a66..3e696814c8 100644 --- a/Core/MIPS/fake/FakeJit.cpp +++ b/Core/MIPS/fake/FakeJit.cpp @@ -153,10 +153,6 @@ void FakeJit::Comp_RunBlock(MIPSOpcode op) ERROR_LOG(JIT, "Comp_RunBlock should never be reached!"); } -bool FakeJit::ReplaceJalTo(u32 dest) { - return true; -} - void FakeJit::Comp_ReplacementFunc(MIPSOpcode op) { } diff --git a/Core/MIPS/fake/FakeJit.h b/Core/MIPS/fake/FakeJit.h index 44cfefec05..7fe42a6981 100644 --- a/Core/MIPS/fake/FakeJit.h +++ b/Core/MIPS/fake/FakeJit.h @@ -162,8 +162,6 @@ private: void MovFromPC(FakeReg r); void MovToPC(FakeReg r); - bool ReplaceJalTo(u32 dest); - void SaveDowncount(); void RestoreDowncount(); diff --git a/Core/MemFault.cpp b/Core/MemFault.cpp index fe413d6a26..95a7088ae3 100644 --- a/Core/MemFault.cpp +++ b/Core/MemFault.cpp @@ -263,10 +263,15 @@ bool HandleFault(uintptr_t hostAddress, void *ctx) { #endif Core_ExecException(targetAddr, currentMIPS->pc, ExecExceptionType::JUMP); // Redirect execution to a crash handler that will switch to CoreState::CORE_RUNTIME_ERROR immediately. - context->CTX_PC = (uintptr_t)MIPSComp::jit->GetCrashHandler(); - ERROR_LOG(MEMMAP, "Bad execution access detected, halting: %08x (last known pc %08x, host: %p)", targetAddr, currentMIPS->pc, (void *)hostAddress); - inCrashHandler = false; - return true; + uintptr_t crashHandler = (uintptr_t)MIPSComp::jit->GetCrashHandler(); + if (crashHandler != 0) { + context->CTX_PC = crashHandler; + ERROR_LOG(MEMMAP, "Bad execution access detected, halting: %08x (last known pc %08x, host: %p)", targetAddr, currentMIPS->pc, (void *)hostAddress); + inCrashHandler = false; + return true; + } + + type = MemoryExceptionType::UNKNOWN; } else if (success) { if (info.isMemoryWrite) { type = MemoryExceptionType::WRITE_WORD; @@ -303,8 +308,11 @@ bool HandleFault(uintptr_t hostAddress, void *ctx) { g_lastCrashAddress = codePtr; // Redirect execution to a crash handler that will switch to CoreState::CORE_RUNTIME_ERROR immediately. + uintptr_t crashHandler = 0; if (MIPSComp::jit) - context->CTX_PC = (uintptr_t)MIPSComp::jit->GetCrashHandler(); + crashHandler = (uintptr_t)MIPSComp::jit->GetCrashHandler(); + if (crashHandler != 0) + context->CTX_PC = crashHandler; else handled = false; ERROR_LOG(MEMMAP, "Bad memory access detected! %08x (%p) Stopping emulation. Info:\n%s", guestAddress, (void *)hostAddress, infoString.c_str()); diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp index 6c7e336ad3..18f887de38 100644 --- a/UI/DevScreens.cpp +++ b/UI/DevScreens.cpp @@ -1003,7 +1003,7 @@ void JitCompareScreen::UpdateDisasm() { snprintf(temp, sizeof(temp), "%i/%i", currentBlock_, blockCacheDebug->GetNumBlocks()); blockName_->SetText(temp); - if (currentBlock_ < 0 || currentBlock_ >= blockCacheDebug->GetNumBlocks()) { + if (currentBlock_ < 0 || !blockCacheDebug || currentBlock_ >= blockCacheDebug->GetNumBlocks()) { leftDisasm_->Add(new TextView(dev->T("No block"))); rightDisasm_->Add(new TextView(dev->T("No block"))); blockStats_->SetText(""); @@ -1067,6 +1067,9 @@ UI::EventReturn JitCompareScreen::OnShowStats(UI::EventParams &e) { } JitBlockCacheDebugInterface *blockCache = MIPSComp::jit->GetBlockCacheDebugInterface(); + if (!blockCache) + return UI::EVENT_DONE; + BlockCacheStats bcStats; blockCache->ComputeStats(bcStats); NOTICE_LOG(JIT, "Num blocks: %i", bcStats.numBlocks); diff --git a/unittest/JitHarness.cpp b/unittest/JitHarness.cpp index 44f0cdf751..750b39ab57 100644 --- a/unittest/JitHarness.cpp +++ b/unittest/JitHarness.cpp @@ -178,26 +178,18 @@ bool TestJit() { jit_speed = ExecCPUTest(); // Disassemble - JitBlockCache *cache = MIPSComp::jit->GetBlockCache(); - JitBlock *block = cache->GetBlock(0); // Should only be one block. -#if PPSSPP_ARCH(ARM) - std::vector lines = DisassembleArm2(block->normalEntry, block->codeSize); -#elif PPSSPP_ARCH(ARM64) - std::vector lines = DisassembleArm64(block->normalEntry, block->codeSize); -#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) - std::vector lines = DisassembleX86(block->normalEntry, block->codeSize); -#elif PPSSPP_ARCH(RISCV64) - std::vector lines = DisassembleRV64(block->normalEntry, block->codeSize); -#else - std::vector lines; -#endif - // Cut off at 25 due to the repetition above. Might need tweaking for large instructions. - const int cutoff = 25; - for (int i = 0; i < std::min((int)lines.size(), cutoff); i++) { - printf("%s\n", lines[i].c_str()); + JitBlockCacheDebugInterface *cache = MIPSComp::jit->GetBlockCacheDebugInterface(); + if (cache) { + JitBlockDebugInfo block = cache->GetBlockDebugInfo(0); // Should only be one block. + std::vector &lines = block.targetDisasm; + // Cut off at 25 due to the repetition above. Might need tweaking for large instructions. + const int cutoff = 25; + for (int i = 0; i < std::min((int)lines.size(), cutoff); i++) { + printf("%s\n", lines[i].c_str()); + } + if (lines.size() > cutoff) + printf("...\n"); } - if (lines.size() > cutoff) - printf("...\n"); printf("Jit was %fx faster than interp.\n\n", jit_speed / interp_speed); }