Update the (currently unused) ARM64 emitter from Dolphin's current one (by HdkR).

This commit is contained in:
Henrik Rydgard 2015-03-06 00:42:08 +01:00
parent c991dd7555
commit c3f38f31f9
2 changed files with 1501 additions and 96 deletions

File diff suppressed because it is too large Load Diff

View File

@ -4,15 +4,18 @@
#pragma once
#include <functional>
#include "ArmCommon.h"
#include "BitSet.h"
#include "CodeBlock.h"
#include "Common.h"
#define DYNA_REC JIT
namespace Arm64Gen
{
#define DYNA_REC JIT
// X30 serves a dual purpose as a link register
// Encoded as <u3:type><u5:reg>
// Types:
@ -76,11 +79,15 @@ enum ARM64Reg
INVALID_REG = 0xFFFFFFFF
};
inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
inline bool Is128Bit(ARM64Reg reg) { return (reg & 0xC0) != 0; }
inline bool Is64Bit(ARM64Reg reg) { return reg & 0x20; }
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }
inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }
inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); }
enum OpType
{
@ -216,10 +223,24 @@ private:
u32 m_shift;
public:
ArithOption(ARM64Reg Rd)
ArithOption(ARM64Reg Rd, bool index = false)
{
// Indexed registers are a certain feature of AARch64
// On Loadstore instructions that use a register offset
// We can have the register as an index
// If we are indexing then the offset register will
// be shifted to the left so we are indexing at intervals
// of the size of what we are loading
// 8-bit: Index does nothing
// 16-bit: Index LSL 1
// 32-bit: Index LSL 2
// 64-bit: Index LSL 3
if (index)
m_shift = 4;
else
m_shift = 0;
m_destReg = Rd;
m_shift = 0;
m_type = TYPE_EXTENDEDREG;
if (Is64Bit(Rd))
{
@ -239,26 +260,36 @@ public:
m_shifttype = shift_type;
m_type = TYPE_SHIFTEDREG;
if (Is64Bit(Rd))
{
m_width = WIDTH_64BIT;
if (shift == 64)
m_shift = 0;
}
else
{
m_width = WIDTH_32BIT;
if (shift == 32)
m_shift = 0;
}
}
TypeSpecifier GetType() const
{
return m_type;
}
ARM64Reg GetReg()
{
return m_destReg;
}
u32 GetData() const
{
switch (m_type)
{
case TYPE_EXTENDEDREG:
return (m_width == WIDTH_64BIT ? (1 << 31) : 0) |
(m_extend << 13) |
return (m_extend << 13) |
(m_shift << 10);
break;
case TYPE_SHIFTEDREG:
return (m_width == WIDTH_64BIT ? (1 << 31) : 0) |
(m_shifttype << 22) |
return (m_shifttype << 22) |
(m_shift << 10);
break;
default:
@ -271,6 +302,8 @@ public:
class ARM64XEmitter
{
friend class ARM64FloatEmitter;
private:
u8* m_code;
u8* m_startcode;
@ -295,14 +328,15 @@ private:
void EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt);
void EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size);
void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos);
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend);
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
protected:
inline void Write32(u32 value)
@ -448,11 +482,14 @@ public:
void MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
// Logical (shifted register)
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
@ -492,6 +529,8 @@ public:
void SXTB(ARM64Reg Rd, ARM64Reg Rn);
void SXTH(ARM64Reg Rd, ARM64Reg Rn);
void SXTW(ARM64Reg Rd, ARM64Reg Rn);
void UXTB(ARM64Reg Rd, ARM64Reg Rn);
void UXTH(ARM64Reg Rd, ARM64Reg Rn);
// Load Register (Literal)
void LDR(ARM64Reg Rt, u32 imm);
@ -538,16 +577,27 @@ public:
void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
// Load/Store register (register offset)
void STRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
void LDRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
void LDRSB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
void STRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
void LDRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
void LDRSH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
void STR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
void LDR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
void PRFM(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
void STRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void LDRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void LDRSB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void STRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void LDRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void LDRSH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void STR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void LDR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void PRFM(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
// Load/Store register (unscaled offset)
void STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
// Load/Store pair
void LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
@ -563,7 +613,168 @@ public:
// ABI related
void ABI_PushRegisters(BitSet32 registers);
void ABI_PopRegisters(BitSet32 registers);
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));
// Utility to generate a call to a std::function object.
//
// Unfortunately, calling operator() directly is undefined behavior in C++
// (this method might be a thunk in the case of multi-inheritance) so we
// have to go through a trampoline function.
template <typename T, typename... Args>
static void CallLambdaTrampoline(const std::function<T(Args...)>* f,
Args... args)
{
(*f)(args...);
}
// This function expects you to have set up the state.
// Overwrites X0 and X30
template <typename T, typename... Args>
ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)
{
auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
MOVI2R(X30, (u64)trampoline);
MOVI2R(X0, (u64)const_cast<void*>((const void*)f));
return X30;
}
};
class ARM64FloatEmitter
{
public:
ARM64FloatEmitter(ARM64XEmitter* emit) : m_emit(emit) {}
void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
// Loadstore unscaled
void LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
// Loadstore single structure
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
// Loadstore multiple structure
void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
// Scalar - 1 Source
void FABS(ARM64Reg Rd, ARM64Reg Rn);
void FNEG(ARM64Reg Rd, ARM64Reg Rn);
// Scalar - 2 Source
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
// Scalar floating point immediate
void FMOV(ARM64Reg Rd, u32 imm);
// Vector
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void NOT(ARM64Reg Rd, ARM64Reg Rn);
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
// Move
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn);
void INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2);
void UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
void SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
// One source
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
// Conversion between float and integer
void FMOV(u8 size, bool top, ARM64Reg Rd, ARM64Reg Rn);
void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
void UCVTF(ARM64Reg Rd, ARM64Reg Rn);
// Float comparison
void FCMP(ARM64Reg Rn, ARM64Reg Rm);
void FCMP(ARM64Reg Rn);
void FCMPE(ARM64Reg Rn, ARM64Reg Rm);
void FCMPE(ARM64Reg Rn);
void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
// Conditional select
void FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
// Permute
void UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
// Shift by immediate
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
// vector x indexed element
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
// ABI related
void ABI_PushRegisters(BitSet32 registers);
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));
private:
ARM64XEmitter* m_emit;
inline void Write32(u32 value) { m_emit->Write32(value); }
// Emitting functions
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
void Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm);
void EmitShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
};
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
@ -572,7 +783,7 @@ private:
void PoisonMemory() override
{
u32* ptr = (u32*)region;
u32* maxptr = (u32*)region + region_size;
u32* maxptr = (u32*)(region + region_size);
// If our memory isn't a multiple of u32 then this won't write the last remaining bytes with anything
// Less than optimal, but there would be nothing we could do but throw a runtime warning anyway.
// AArch64: 0xD4200000 = BRK 0