Merge pull request #7839 from unknownbrackets/arm64-micro

More ARM64 jit optimizations (multiply mainly)
This commit is contained in:
Henrik Rydgård 2015-07-03 10:38:35 +02:00
commit 7dbe2821dc
18 changed files with 115 additions and 88 deletions

View File

@ -670,13 +670,17 @@ void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn,
bool b64Bit = Is64Bit(Rt);
bool bVec = IsVector(Rt);
u8 shift = 0;
if (size == 64)
imm >>= 3;
shift = 3;
else if (size == 32)
imm >>= 2;
shift = 2;
else if (size == 16)
imm >>= 1;
shift = 1;
_assert_msg_(DYNA_REC, ((imm >> shift) << shift) == imm, "%s(INDEX_UNSIGNED): offset must be aligned %d", __FUNCTION__, imm);
imm >>= shift;
_assert_msg_(DYNA_REC, imm >= 0, "%s(INDEX_UNSIGNED): offset must be positive %d", __FUNCTION__, imm);
_assert_msg_(DYNA_REC, !(imm & ~0xFFF), "%s(INDEX_UNSIGNED): offset too large %d", __FUNCTION__, imm);
@ -1354,6 +1358,10 @@ void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
{
EncodeData3SrcInst(5, Rd, Rn, Rm, Ra);
}
void ARM64XEmitter::UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
UMADDL(Rd, Rn, Rm, SP);
}
void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
{
EncodeData3SrcInst(6, Rd, Rn, Rm, Ra);

View File

@ -532,6 +532,7 @@ public:
void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);

View File

@ -39,7 +39,7 @@ using namespace MIPSAnalyst;
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }

View File

@ -40,7 +40,7 @@
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }

View File

@ -37,9 +37,9 @@
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define NEON_IF_AVAILABLE(func) { if (jo.useNEONVFPU) { func(op); return; } }
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)

View File

@ -47,8 +47,11 @@
#include "Core/MIPS/ARM/ArmCompVFPUNEONUtil.h"
// TODO: Somehow #ifdef away on ARMv5eabi, without breaking the linker.
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define DISABLE_UNKNOWN_PREFIX { WLOG("DISABLE: Unknown Prefix in %s", __FUNCTION__); fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }

View File

@ -44,11 +44,6 @@
#include "Core/MIPS/ARM/ArmCompVFPUNEONUtil.h"
// TODO: Somehow #ifdef away on ARMv5eabi, without breaking the linker.
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)

View File

@ -184,6 +184,7 @@ void ArmRegCache::MapRegTo(ARMReg reg, MIPSGPReg mipsReg, int mapFlags) {
} else {
if (mipsReg == MIPS_REG_ZERO) {
// This way, if we SetImm() it, we'll keep it.
// TODO: Actually, this may cause trouble with SetRegImm? The reg is NOT zero yet.
mr[mipsReg].loc = ML_ARMREG_IMM;
mr[mipsReg].imm = 0;
} else {

View File

@ -74,7 +74,7 @@ struct RegARM {
struct RegMIPS {
// Where is this MIPS register?
ArmJitConstants::RegMIPSLoc loc;
// Data (only one of these is used, depending on loc. Could make a union).
// Data (both or only one may be used, depending on loc.)
u32 imm;
ArmGen::ARMReg reg; // reg index
bool spillLock; // if true, this register cannot be spilled.

View File

@ -40,12 +40,11 @@ using namespace MIPSAnalyst;
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
namespace MIPSComp
{
namespace MIPSComp {
using namespace Arm64Gen;
using namespace Arm64JitConstants;
@ -445,7 +444,7 @@ void Arm64Jit::Comp_Special3(MIPSOpcode op) {
return;
}
// It might be nice to avoid flushing rs, but it's the a little slower and
// It might be nice to avoid flushing rs, but it's a little slower and
// usually more instructions. Not worth it.
gpr.MapDirtyIn(rt, rs, false);
BFI(gpr.R(rt), gpr.R(rs), pos, size - pos);
@ -545,28 +544,33 @@ void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
MIPSGPReg rs = _RS;
MIPSGPReg rd = _RD;
// Note that in all cases below, LO is actually mapped to HI:LO.
// That is, the host reg is 64 bits and has HI at the top.
// HI is not mappable.
switch (op & 63) {
case 16: // R(rd) = HI; //mfhi
if (gpr.IsImm(MIPS_REG_HI)) {
gpr.SetImm(rd, gpr.GetImm(MIPS_REG_HI));
// LO and HI are in the same reg.
if (gpr.IsImm(MIPS_REG_LO)) {
gpr.SetImm(rd, gpr.GetImm(MIPS_REG_LO) >> 32);
break;
}
gpr.MapDirtyIn(rd, MIPS_REG_HI);
MOV(gpr.R(rd), gpr.R(MIPS_REG_HI));
gpr.MapDirtyIn(rd, MIPS_REG_LO);
UBFX(EncodeRegTo64(gpr.R(rd)), EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32, 32);
break;
case 17: // HI = R(rs); //mthi
if (gpr.IsImm(rs)) {
gpr.SetImm(MIPS_REG_HI, gpr.GetImm(rs));
if (gpr.IsImm(rs) && gpr.IsImm(MIPS_REG_LO)) {
gpr.SetImm(MIPS_REG_HI, (gpr.GetImm(rs) << 32) | (gpr.GetImm(MIPS_REG_LO) & 0xFFFFFFFFULL));
break;
}
gpr.MapDirtyIn(MIPS_REG_HI, rs);
MOV(gpr.R(MIPS_REG_HI), gpr.R(rs));
gpr.MapDirtyIn(MIPS_REG_LO, rs, false);
BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), EncodeRegTo64(gpr.R(rs)), 32, 32);
break;
case 18: // R(rd) = LO; break; //mflo
if (gpr.IsImm(MIPS_REG_LO)) {
gpr.SetImm(rd, gpr.GetImm(MIPS_REG_LO));
gpr.SetImm(rd, gpr.GetImm(MIPS_REG_LO) & 0xFFFFFFFFULL);
break;
}
gpr.MapDirtyIn(rd, MIPS_REG_LO);
@ -574,45 +578,42 @@ void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
break;
case 19: // LO = R(rs); break; //mtlo
if (gpr.IsImm(rs)) {
gpr.SetImm(MIPS_REG_LO, gpr.GetImm(rs));
if (gpr.IsImm(rs) && gpr.IsImm(MIPS_REG_LO)) {
gpr.SetImm(MIPS_REG_HI, gpr.GetImm(rs) | (gpr.GetImm(MIPS_REG_LO) & ~0xFFFFFFFFULL));
break;
}
gpr.MapDirtyIn(MIPS_REG_LO, rs);
MOV(gpr.R(MIPS_REG_LO), gpr.R(rs));
gpr.MapDirtyIn(MIPS_REG_LO, rs, false);
BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), EncodeRegTo64(gpr.R(rs)), 0, 32);
break;
// TODO: All of these could be more elegant if we cached HI and LO together in one 64-bit register!
case 24: //mult (the most popular one). lo,hi = signed mul (rs * rt)
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
s64 result = (s64)(s32)gpr.GetImm(rs) * (s64)(s32)gpr.GetImm(rt);
u64 resultBits = (u64)result;
gpr.SetImm(MIPS_REG_LO, (u32)(resultBits >> 0));
gpr.SetImm(MIPS_REG_HI, (u32)(resultBits >> 32));
gpr.SetImm(MIPS_REG_LO, (u64)result);
break;
}
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);
SMULL(EncodeRegTo64(gpr.R(MIPS_REG_LO)), gpr.R(rs), gpr.R(rt));
LSR(EncodeRegTo64(gpr.R(MIPS_REG_HI)), EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32);
break;
case 25: //multu (2nd) lo,hi = unsigned mul (rs * rt)
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
u64 resultBits = (u64)gpr.GetImm(rs) * (u64)gpr.GetImm(rt);
gpr.SetImm(MIPS_REG_LO, (u32)(resultBits >> 0));
gpr.SetImm(MIPS_REG_HI, (u32)(resultBits >> 32));
gpr.SetImm(MIPS_REG_LO, resultBits);
break;
}
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
MUL(EncodeRegTo64(gpr.R(MIPS_REG_LO)), EncodeRegTo64(gpr.R(rs)), EncodeRegTo64(gpr.R(rt)));
LSR(EncodeRegTo64(gpr.R(MIPS_REG_HI)), EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);
// In case of pointerification, let's use UMULL.
UMULL(EncodeRegTo64(gpr.R(MIPS_REG_LO)), gpr.R(rs), gpr.R(rt));
break;
case 26: //div
// TODO: Does this handle INT_MAX, 0, etc. correctly?
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);
UBFX(SCRATCH1_64, EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32, 32);
SDIV(gpr.R(MIPS_REG_LO), gpr.R(rs), gpr.R(rt));
MSUB(gpr.R(MIPS_REG_HI), gpr.R(rt), gpr.R(MIPS_REG_LO), gpr.R(rs));
MSUB(SCRATCH1, gpr.R(rt), gpr.R(MIPS_REG_LO), gpr.R(rs));
BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), SCRATCH1_64, 32, 32);
break;
case 27: //divu
@ -622,11 +623,10 @@ void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
if (denominator == 0) {
// TODO: Is this correct?
gpr.SetImm(MIPS_REG_LO, 0);
gpr.SetImm(MIPS_REG_HI, 0);
} else {
gpr.MapDirtyDirtyIn(MIPS_REG_LO, MIPS_REG_HI, rs);
gpr.MapDirtyIn(MIPS_REG_LO, rs);
// Remainder is just an AND, neat.
ANDI2R(gpr.R(MIPS_REG_HI), gpr.R(rs), denominator - 1, SCRATCH1);
ANDI2R(SCRATCH1, gpr.R(rs), denominator - 1, SCRATCH1);
int shift = 0;
while (denominator != 0) {
++shift;
@ -638,56 +638,47 @@ void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
} else {
MOV(gpr.R(MIPS_REG_LO), gpr.R(rs));
}
BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), SCRATCH1_64, 32, 32);
}
} else {
// TODO: Does this handle INT_MAX, 0, etc. correctly?
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);
UBFX(SCRATCH1_64, EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32, 32);
UDIV(gpr.R(MIPS_REG_LO), gpr.R(rs), gpr.R(rt));
MSUB(gpr.R(MIPS_REG_HI), gpr.R(rt), gpr.R(MIPS_REG_LO), gpr.R(rs));
MSUB(SCRATCH1, gpr.R(rt), gpr.R(MIPS_REG_LO), gpr.R(rs));
BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), SCRATCH1_64, 32, 32);
}
break;
case 28: //madd
{
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt, false);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);
ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));
ARM64Reg hi64 = EncodeRegTo64(gpr.R(MIPS_REG_HI));
ORR(lo64, lo64, hi64, ArithOption(lo64, ST_LSL, 32));
SMADDL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!
LSR(hi64, lo64, 32);
}
break;
case 29: //maddu
{
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt, false);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);
ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));
ARM64Reg hi64 = EncodeRegTo64(gpr.R(MIPS_REG_HI));
ORR(lo64, lo64, hi64, ArithOption(lo64, ST_LSL, 32));
UMADDL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!
LSR(hi64, lo64, 32);
}
break;
case 46: // msub
{
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt, false);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);
ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));
ARM64Reg hi64 = EncodeRegTo64(gpr.R(MIPS_REG_HI));
ORR(lo64, lo64, hi64, ArithOption(lo64, ST_LSL, 32));
SMSUBL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!
LSR(hi64, lo64, 32);
}
break;
case 47: // msubu
{
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt, false);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);
ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));
ARM64Reg hi64 = EncodeRegTo64(gpr.R(MIPS_REG_HI));
ORR(lo64, lo64, hi64, ArithOption(lo64, ST_LSL, 32));
UMSUBL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!
LSR(hi64, lo64, 32);
break;
}

View File

@ -48,7 +48,7 @@
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
@ -334,10 +334,7 @@ void Arm64Jit::Comp_mxc1(MIPSOpcode op)
} else {
gpr.MapDirtyIn(rt, MIPS_REG_FPCOND);
LDR(INDEX_UNSIGNED, gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31));
// BFI(gpr.R(rt), gpr.R(MIPS_REG_FPCOND), 23, 1);
ANDI2R(SCRATCH1, gpr.R(MIPS_REG_FPCOND), 1); // Just in case
ANDI2R(gpr.R(rt), gpr.R(rt), ~(0x1 << 23), SCRATCH2); // SCRATCHREG2 won't be used, this turns into a simple BIC.
ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSL, 23));
BFI(gpr.R(rt), gpr.R(MIPS_REG_FPCOND), 23, 1);
}
} else if (fs == 0) {
gpr.SetImm(rt, MIPSState::FCR0_VALUE);

View File

@ -64,8 +64,7 @@
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
namespace MIPSComp
{
namespace MIPSComp {
using namespace Arm64Gen;
using namespace Arm64JitConstants;

View File

@ -35,9 +35,9 @@
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)
#define _RD MIPS_GET_RD(op)
@ -50,8 +50,7 @@
#define _IMM16 (signed short)(op & 0xFFFF)
#define _IMM26 (op & 0x03FFFFFF)
namespace MIPSComp
{
namespace MIPSComp {
using namespace Arm64Gen;
using namespace Arm64JitConstants;

View File

@ -69,6 +69,13 @@ bool Arm64RegCache::IsMapped(MIPSGPReg mipsReg) {
return mr[mipsReg].loc == ML_ARMREG;
}
bool Arm64RegCache::IsMappedAsPointer(MIPSGPReg mipsReg) {
if (IsMapped(mipsReg)) {
return ar[mr[mipsReg].reg].pointerified;
}
return false;
}
void Arm64RegCache::SetRegImm(ARM64Reg reg, u64 imm) {
// On ARM64, at least Cortex A57, good old MOVT/MOVW (MOVK in 64-bit) is really fast.
emit_->MOVI2R(reg, imm);
@ -90,8 +97,12 @@ void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) {
case ML_MEM:
{
int offset = GetMipsRegOffset(mipsReg);
ARM64Reg loadReg = reg;
// INFO_LOG(JIT, "MapRegTo %d mips: %d offset %d", (int)reg, mipsReg, offset);
emit_->LDR(INDEX_UNSIGNED, reg, CTXREG, offset);
if (mipsReg == MIPS_REG_LO) {
loadReg = EncodeRegTo64(loadReg);
}
emit_->LDR(INDEX_UNSIGNED, loadReg, CTXREG, offset);
mr[mipsReg].loc = ML_ARMREG;
break;
}
@ -114,6 +125,7 @@ void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) {
} else {
if (mipsReg == MIPS_REG_ZERO) {
// This way, if we SetImm() it, we'll keep it.
// TODO: Actually, this may cause trouble with SetRegImm? The reg is NOT zero.
mr[mipsReg].loc = ML_ARMREG_IMM;
mr[mipsReg].imm = 0;
} else {
@ -157,6 +169,10 @@ ARM64Reg Arm64RegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) {
// TODO: Somewhat smarter spilling - currently simply spills the first available, should do
// round robin or FIFO or something.
ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
if (mipsReg == MIPS_REG_HI) {
ERROR_LOG_REPORT(JIT, "Cannot map HI in Arm64RegCache");
return INVALID_REG;
}
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
@ -172,7 +188,8 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
// If reg is written to, pointerification is lost.
ar[armReg].pointerified = false;
}
return (ARM64Reg)mr[mipsReg].reg;
return mr[mipsReg].reg;
}
// Okay, not mapped, so we need to allocate an ARM register.
@ -298,8 +315,11 @@ void Arm64RegCache::FlushArmReg(ARM64Reg r) {
mreg.loc = ML_IMM;
mreg.reg = INVALID_REG;
} else {
ARM64Reg storeReg = r;
if (ar[r].mipsReg == MIPS_REG_LO)
storeReg = EncodeRegTo64(storeReg);
if (ar[r].isDirty && mreg.loc == ML_ARMREG)
emit_->STR(INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
mreg.loc = ML_MEM;
mreg.reg = INVALID_REG;
mreg.imm = 0;
@ -327,7 +347,10 @@ void Arm64RegCache::FlushR(MIPSGPReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
if (r != MIPS_REG_ZERO) {
if (r == MIPS_REG_LO) {
SetRegImm(SCRATCH1_64, mr[r].imm);
emit_->STR(INDEX_UNSIGNED, SCRATCH1_64, CTXREG, GetMipsRegOffset(r));
} else if (r != MIPS_REG_ZERO) {
SetRegImm(SCRATCH1, mr[r].imm);
emit_->STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, GetMipsRegOffset(r));
}
@ -340,7 +363,10 @@ void Arm64RegCache::FlushR(MIPSGPReg r) {
}
if (ar[mr[r].reg].isDirty) {
if (r != MIPS_REG_ZERO) {
emit_->STR(INDEX_UNSIGNED, mr[r].reg, CTXREG, GetMipsRegOffset(r));
ARM64Reg storeReg = mr[r].reg;
if (r == MIPS_REG_LO)
storeReg = EncodeRegTo64(storeReg);
emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(r));
}
ar[mr[r].reg].isDirty = false;
}
@ -375,7 +401,7 @@ void Arm64RegCache::FlushAll() {
}
}
void Arm64RegCache::SetImm(MIPSGPReg r, u32 immVal) {
void Arm64RegCache::SetImm(MIPSGPReg r, u64 immVal) {
if (r == MIPS_REG_ZERO && immVal != 0)
ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal);
@ -398,7 +424,7 @@ bool Arm64RegCache::IsImm(MIPSGPReg r) const {
return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM;
}
u32 Arm64RegCache::GetImm(MIPSGPReg r) const {
u64 Arm64RegCache::GetImm(MIPSGPReg r) const {
if (r == MIPS_REG_ZERO) return 0;
if (mr[r].loc != ML_IMM && mr[r].loc != ML_ARMREG_IMM) {
ERROR_LOG_REPORT(JIT, "Trying to get imm from non-imm register %i", r);
@ -443,7 +469,7 @@ void Arm64RegCache::ReleaseSpillLock(MIPSGPReg reg) {
ARM64Reg Arm64RegCache::R(MIPSGPReg mipsReg) {
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
return (ARM64Reg)mr[mipsReg].reg;
return mr[mipsReg].reg;
} else {
ERROR_LOG_REPORT(JIT, "Reg %i not in arm reg. compilerPC = %08x", mipsReg, compilerPC_);
return INVALID_REG; // BAAAD

View File

@ -77,8 +77,8 @@ struct RegARM {
struct RegMIPS {
// Where is this MIPS register?
Arm64JitConstants::RegMIPSLoc loc;
// Data (only one of these is used, depending on loc. Could make a union).
u32 imm;
// Data (both or only one may be used, depending on loc.)
u64 imm;
Arm64Gen::ARM64Reg reg; // reg index
bool spillLock; // if true, this register cannot be spilled.
// If loc == ML_MEM, it's back in its location in the CPU context struct.
@ -103,9 +103,9 @@ public:
void ReleaseSpillLock(MIPSGPReg reg);
void ReleaseSpillLocks();
void SetImm(MIPSGPReg reg, u32 immVal);
void SetImm(MIPSGPReg reg, u64 immVal);
bool IsImm(MIPSGPReg reg) const;
u32 GetImm(MIPSGPReg reg) const;
u64 GetImm(MIPSGPReg reg) const;
// Optimally set a register to an imm value (possibly using another register.)
void SetRegImm(Arm64Gen::ARM64Reg reg, u64 imm);

View File

@ -170,6 +170,9 @@ public:
// If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code.
u32 vfpuCtrl[16];
// ARM64 wants lo/hi to be aligned to 64 bits from the base of this struct.
u32 padLoHi;
union {
struct {
u32 pc;

View File

@ -37,7 +37,7 @@ using namespace MIPSAnalyst;
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }

View File

@ -437,7 +437,11 @@ static void DataProcessingRegister(uint32_t w, uint64_t addr, Instruction *instr
// The rest are 64-bit accumulator, 32-bit operands
char sign = (op31 >> 2) ? 'u' : 's';
int opn = (op31 & 0x3) << 1 | o0;
snprintf(instr->text, sizeof(instr->text), "%c%s x%d, x%d, w%d, w%d", sign, opnames[opn], Rd, Rn, Rm, Ra);
if (opn < 4 && Ra == 31) {
snprintf(instr->text, sizeof(instr->text), "%cmull x%d, w%d, w%d", sign, Rd, Rn, Rm);
} else {
snprintf(instr->text, sizeof(instr->text), "%c%s x%d, w%d, w%d, x%d", sign, opnames[opn], Rd, Rn, Rm, Ra);
}
}
} else {
// Logical (extended register)