mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-25 01:00:01 +00:00
9c73272604
mips: Minor tweaks to the mips emitter
331 lines
11 KiB
C++
331 lines
11 KiB
C++
// Copyright (c) 2014- PPSSPP Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0 or later versions.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official git repository and contact information can be found at
|
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
|
|
|
#pragma once
|
|
// Symbian can't build this due to an old gcc/lib combination, and doesn't need to.
|
|
// Kind programmer, if you want to translate this to a proper feature-detection
|
|
// define, please feel free to.
|
|
#ifndef __SYMBIAN32__
|
|
|
|
#include <functional>
|
|
#include <vector>
|
|
#include <stdint.h>
|
|
|
|
#include "Common.h"
|
|
#include "MsgHandler.h"
|
|
|
|
namespace MIPSGen {
|
|
|
|
enum MIPSReg {
|
|
R_ZERO = 0,
|
|
R_AT,
|
|
V0, V1,
|
|
|
|
A0 = 4, A1 = 5, A2 = 6, A3 = 7, A4 = 8, A5 = 9, A6 = 10, A7 = 11,
|
|
// Alternate names depending on ABI.
|
|
T0 = 8, T1 = 9, T2 = 10, T3 = 11,
|
|
|
|
T4, T5, T6, T7,
|
|
S0, S1, S2, S3, S4, S5, S6, S7,
|
|
T8, T9,
|
|
K0, K1,
|
|
R_GP, R_SP, R_FP,
|
|
R_RA,
|
|
|
|
F_BASE = 32,
|
|
F0 = 32, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15,
|
|
F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31,
|
|
|
|
INVALID_REG = 0xFFFFFFFF
|
|
};
|
|
|
|
enum {
|
|
// All 32 except: ZERO, K0/K1 (kernel), RA. The rest are only convention.
|
|
NUMGPRs = 32 - 1 - 2 - 1,
|
|
NUMFPRs = 32,
|
|
};
|
|
|
|
enum FixupBranchType {
|
|
// 16-bit immediate jump/branch (to pc + (simm16 + 1 ops).)
|
|
BRANCH_16,
|
|
// 26-bit immediate jump/branch (to pc's 4 high bits + imm * 4.)
|
|
BRANCH_26,
|
|
};
|
|
|
|
// Beware of delay slots.
|
|
struct FixupBranch {
|
|
u8 *ptr;
|
|
FixupBranchType type;
|
|
};
|
|
|
|
class MIPSEmitter {
|
|
public:
|
|
MIPSEmitter() : code_(0), lastCacheFlushEnd_(0) {
|
|
}
|
|
MIPSEmitter(u8 *code_ptr) : code_(code_ptr), lastCacheFlushEnd_(code_ptr) {
|
|
SetCodePtr(code_ptr);
|
|
}
|
|
virtual ~MIPSEmitter() {
|
|
}
|
|
|
|
void SetCodePtr(u8 *ptr);
|
|
void ReserveCodeSpace(u32 bytes);
|
|
const u8 *AlignCode16();
|
|
const u8 *AlignCodePage();
|
|
const u8 *GetCodePtr() const;
|
|
u8 *GetWritableCodePtr();
|
|
void FlushIcache();
|
|
void FlushIcacheSection(u8 *start, u8 *end);
|
|
|
|
// 20 bits valid in code.
|
|
void BREAK(u32 code);
|
|
|
|
void NOP() {
|
|
SLL(R_ZERO, R_ZERO, 0);
|
|
}
|
|
|
|
// Note for all branches and jumps:
|
|
// MIPS has DELAY SLOTS. This emitter makes it so if you forget that, you'll be safe.
|
|
// If you want to run something inside a delay slot, emit the instruction inside a closure.
|
|
//
|
|
// Example: Translates to:
|
|
// J(&myFunc); J(&myFunc);
|
|
// ADDU(V0, V0, V1); NOP();
|
|
// ADDU(V0, V0, V1);
|
|
//
|
|
// J(&myFunc, [&] { J(&myFunc);
|
|
// ADDU(V0, V0, V1); ADDU(V0, V0, V1);
|
|
// });
|
|
//
|
|
// This applies to all J*() and B*() functions (except BREAK(), which is not a branch func.)
|
|
|
|
FixupBranch J(std::function<void ()> delaySlot = nullptr);
|
|
void J(const void *func, std::function<void ()> delaySlot = nullptr);
|
|
FixupBranch JAL(std::function<void ()> delaySlot = nullptr);
|
|
void JAL(const void *func, std::function<void ()> delaySlot = nullptr);
|
|
void JR(MIPSReg rs, std::function<void ()> delaySlot = nullptr);
|
|
void JRRA(std::function<void ()> delaySlot = nullptr) {
|
|
JR(R_RA, delaySlot);
|
|
}
|
|
void JALR(MIPSReg rd, MIPSReg rs, std::function<void ()> delaySlot = nullptr);
|
|
void JALR(MIPSReg rs, std::function<void ()> delaySlot = nullptr) {
|
|
JALR(R_RA, rs, delaySlot);
|
|
}
|
|
|
|
inline FixupBranch B(std::function<void ()> delaySlot = nullptr) {
|
|
return BEQ(R_ZERO, R_ZERO, delaySlot);
|
|
}
|
|
inline void B(const void *func, std::function<void ()> delaySlot = nullptr) {
|
|
return BEQ(R_ZERO, R_ZERO, func, delaySlot);
|
|
}
|
|
FixupBranch BLTZ(MIPSReg rs, std::function<void ()> delaySlot = nullptr);
|
|
void BLTZ(MIPSReg rs, const void *func, std::function<void ()> delaySlot = nullptr);
|
|
FixupBranch BEQ(MIPSReg rs, MIPSReg rt, std::function<void ()> delaySlot = nullptr);
|
|
void BEQ(MIPSReg rs, MIPSReg rt, const void *func, std::function<void ()> delaySlot = nullptr);
|
|
FixupBranch BNE(MIPSReg rs, MIPSReg rt, std::function<void ()> delaySlot = nullptr);
|
|
void BNE(MIPSReg rs, MIPSReg rt, const void *func, std::function<void ()> delaySlot = nullptr);
|
|
inline FixupBranch BEQZ(MIPSReg rs, std::function<void ()> delaySlot = nullptr) {
|
|
return BEQ(rs, R_ZERO, delaySlot);
|
|
}
|
|
inline void BEQZ(MIPSReg rs, const void *func, std::function<void ()> delaySlot = nullptr) {
|
|
return BEQ(rs, R_ZERO, func, delaySlot);
|
|
}
|
|
inline FixupBranch BNEZ(MIPSReg rs, std::function<void ()> delaySlot = nullptr) {
|
|
return BNE(rs, R_ZERO, delaySlot);
|
|
}
|
|
inline void BNEZ(MIPSReg rs, const void *func, std::function<void ()> delaySlot = nullptr) {
|
|
return BNE(rs, R_ZERO, func, delaySlot);
|
|
}
|
|
FixupBranch BLEZ(MIPSReg rs, std::function<void ()> delaySlot = nullptr);
|
|
void BLEZ(MIPSReg rs, const void *func, std::function<void ()> delaySlot = nullptr);
|
|
FixupBranch BGTZ(MIPSReg rs, std::function<void ()> delaySlot = nullptr);
|
|
void BGTZ(MIPSReg rs, const void *func, std::function<void ()> delaySlot = nullptr);
|
|
|
|
void SetJumpTarget(const FixupBranch &branch);
|
|
bool BInRange(const void *func);
|
|
bool JInRange(const void *func);
|
|
|
|
// R_AT is the stereotypical scratch reg, but it is not likely to be used.
|
|
void QuickCallFunction(MIPSReg scratchreg, const void *func);
|
|
template <typename T> void QuickCallFunction(MIPSReg scratchreg, T func) {
|
|
QuickCallFunction(scratchreg, (const void *)func);
|
|
}
|
|
|
|
void LB(MIPSReg dest, MIPSReg base, s16 offset);
|
|
void LH(MIPSReg dest, MIPSReg base, s16 offset);
|
|
void LW(MIPSReg dest, MIPSReg base, s16 offset);
|
|
void SB(MIPSReg value, MIPSReg base, s16 offset);
|
|
void SH(MIPSReg dest, MIPSReg base, s16 offset);
|
|
void SW(MIPSReg value, MIPSReg base, s16 offset);
|
|
|
|
// These exist for the sole purpose of making compilation fail if you try to load/store from R+R.
|
|
void LB(MIPSReg dest, MIPSReg base, MIPSReg invalid);
|
|
void LH(MIPSReg dest, MIPSReg base, MIPSReg invalid);
|
|
void LW(MIPSReg dest, MIPSReg base, MIPSReg invalid);
|
|
void SB(MIPSReg value, MIPSReg base, MIPSReg invalid);
|
|
void SH(MIPSReg dest, MIPSReg base, MIPSReg invalid);
|
|
void SW(MIPSReg value, MIPSReg base, MIPSReg invalid);
|
|
|
|
void SLL(MIPSReg rd, MIPSReg rt, u8 sa);
|
|
void SRL(MIPSReg rd, MIPSReg rt, u8 sa);
|
|
void SRA(MIPSReg rd, MIPSReg rt, u8 sa);
|
|
void SLLV(MIPSReg rd, MIPSReg rt, MIPSReg rs);
|
|
void SRLV(MIPSReg rd, MIPSReg rt, MIPSReg rs);
|
|
void SRAV(MIPSReg rd, MIPSReg rt, MIPSReg rs);
|
|
|
|
void SLT(MIPSReg rd, MIPSReg rt, MIPSReg rs);
|
|
void SLTU(MIPSReg rd, MIPSReg rt, MIPSReg rs);
|
|
void SLTI(MIPSReg rd, MIPSReg rt, s16 imm);
|
|
// Note: very importantly, *sign* extends imm before an unsigned compare.
|
|
void SLTIU(MIPSReg rt, MIPSReg rs, s16 imm);
|
|
|
|
// ADD/SUB/ADDI intentionally omitted. They are just versions that trap.
|
|
void ADDU(MIPSReg rd, MIPSReg rs, MIPSReg rt);
|
|
void SUBU(MIPSReg rd, MIPSReg rs, MIPSReg rt);
|
|
void ADDIU(MIPSReg rt, MIPSReg rs, s16 imm);
|
|
void SUBIU(MIPSReg rt, MIPSReg rs, s16 imm) {
|
|
ADDIU(rt, rs, -imm);
|
|
}
|
|
|
|
void AND(MIPSReg rd, MIPSReg rs, MIPSReg rt);
|
|
void OR(MIPSReg rd, MIPSReg rs, MIPSReg rt);
|
|
void XOR(MIPSReg rd, MIPSReg rs, MIPSReg rt);
|
|
void ANDI(MIPSReg rt, MIPSReg rs, s16 imm);
|
|
void ORI(MIPSReg rt, MIPSReg rs, s16 imm);
|
|
void XORI(MIPSReg rt, MIPSReg rs, s16 imm);
|
|
|
|
// Clears the lower bits. On MIPS64, the result is sign extended.
|
|
void LUI(MIPSReg rt, s16 imm);
|
|
|
|
void INS(MIPSReg rt, MIPSReg rs, s8 pos, s8 size);
|
|
void EXT(MIPSReg rt, MIPSReg rs, s8 pos, s8 size);
|
|
|
|
// MIPS64 only. Transparently uses DSLL32 to shift 32-63 bits.
|
|
void DSLL(MIPSReg rd, MIPSReg rt, u8 sa);
|
|
|
|
void MOVI2R(MIPSReg reg, u64 val);
|
|
void MOVI2R(MIPSReg reg, s64 val) {
|
|
MOVI2R(reg, (u64)val);
|
|
}
|
|
void MOVI2R(MIPSReg reg, u32 val);
|
|
void MOVI2R(MIPSReg reg, s32 val) {
|
|
MOVI2R(reg, (u32)val);
|
|
}
|
|
template <class T> void MOVP2R(MIPSReg reg, T *val) {
|
|
if (sizeof(uintptr_t) > sizeof(u32)) {
|
|
MOVI2R(reg, (u64)(intptr_t)(const void *)val);
|
|
} else {
|
|
MOVI2R(reg, (u32)(intptr_t)(const void *)val);
|
|
}
|
|
}
|
|
|
|
protected:
|
|
inline void Write32(u32 value) {
|
|
*code32_++ = value;
|
|
}
|
|
|
|
// Less parenthesis.
|
|
inline void Write32Fields(u8 pos1, u32 v1) {
|
|
*code32_++ = (v1 << pos1);
|
|
}
|
|
inline void Write32Fields(u8 pos1, u32 v1, u8 pos2, u32 v2) {
|
|
*code32_++ = (v1 << pos1) | (v2 << pos2);
|
|
}
|
|
inline void Write32Fields(u8 pos1, u32 v1, u8 pos2, u32 v2, u8 pos3, u32 v3) {
|
|
*code32_++ = (v1 << pos1) | (v2 << pos2) | (v3 << pos3);
|
|
}
|
|
inline void Write32Fields(u8 pos1, u32 v1, u8 pos2, u32 v2, u8 pos3, u32 v3, u8 pos4, u32 v4) {
|
|
*code32_++ = (v1 << pos1) | (v2 << pos2) | (v3 << pos3) | (v4 << pos4);
|
|
}
|
|
inline void Write32Fields(u8 pos1, u32 v1, u8 pos2, u32 v2, u8 pos3, u32 v3, u8 pos4, u32 v4, u8 pos5, u32 v5) {
|
|
*code32_++ = (v1 << pos1) | (v2 << pos2) | (v3 << pos3) | (v4 << pos5) | (v5 << pos5);
|
|
}
|
|
inline void Write32Fields(u8 pos1, u32 v1, u8 pos2, u32 v2, u8 pos3, u32 v3, u8 pos4, u32 v4, u8 pos5, u32 v5, u8 pos6, u32 v6) {
|
|
*code32_++ = (v1 << pos1) | (v2 << pos2) | (v3 << pos3) | (v4 << pos5) | (v5 << pos5) | (v6 << pos6);
|
|
}
|
|
|
|
static void SetJumpTarget(const FixupBranch &branch, const void *dst);
|
|
static bool BInRange(const void *src, const void *dst);
|
|
static bool JInRange(const void *src, const void *dst);
|
|
FixupBranch MakeFixupBranch(FixupBranchType type);
|
|
void ApplyDelaySlot(std::function<void ()> delaySlot);
|
|
|
|
private:
|
|
union {
|
|
u8 *code_;
|
|
u32 *code32_;
|
|
};
|
|
u8 *lastCacheFlushEnd_;
|
|
};
|
|
|
|
// Everything that needs to generate machine code should inherit from this.
|
|
// You get memory management for free, plus, you can use all the LUI etc functions without
|
|
// having to prefix them with gen-> or something similar.
|
|
class MIPSCodeBlock : public MIPSEmitter {
|
|
public:
|
|
MIPSCodeBlock() : region(nullptr), region_size(0) {
|
|
}
|
|
virtual ~MIPSCodeBlock() {
|
|
if (region) {
|
|
FreeCodeSpace();
|
|
}
|
|
}
|
|
|
|
// Call this before you generate any code.
|
|
void AllocCodeSpace(int size);
|
|
|
|
// Always clear code space with breakpoints, so that if someone accidentally executes
|
|
// uninitialized, it just breaks into the debugger.
|
|
void ClearCodeSpace();
|
|
|
|
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
|
|
void FreeCodeSpace();
|
|
|
|
bool IsInSpace(const u8 *ptr) const {
|
|
return ptr >= region && ptr < region + region_size;
|
|
}
|
|
|
|
// Can possibly be undone. Will write protect the entire code region.
|
|
// Start over if you need to change the code, though (call FreeCodeSpace(), AllocCodeSpace().)
|
|
void WriteProtect();
|
|
void UnWriteProtect();
|
|
|
|
void ResetCodePtr() {
|
|
SetCodePtr(region);
|
|
}
|
|
|
|
size_t GetSpaceLeft() const {
|
|
return region_size - (GetCodePtr() - region);
|
|
}
|
|
|
|
u8 *GetBasePtr() {
|
|
return region;
|
|
}
|
|
|
|
size_t GetOffset(const u8 *ptr) const {
|
|
return ptr - region;
|
|
}
|
|
|
|
protected:
|
|
u8 *region;
|
|
size_t region_size;
|
|
};
|
|
|
|
};
|
|
|
|
#endif
|