arm64: Meld LO and HI together for multiplies.

This commit is contained in:
Unknown W. Brackets 2015-07-02 20:23:27 -07:00
parent 3780b6c0a5
commit fed687fb59
5 changed files with 73 additions and 60 deletions

View File

@ -74,7 +74,7 @@ struct RegARM {
struct RegMIPS {
// Where is this MIPS register?
ArmJitConstants::RegMIPSLoc loc;
// Data (only one of these is used, depending on loc. Could make a union).
// Data (both or only one may be used, depending on loc.)
u32 imm;
ArmGen::ARMReg reg; // reg index
bool spillLock; // if true, this register cannot be spilled.

View File

@ -545,28 +545,33 @@ void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
MIPSGPReg rs = _RS;
MIPSGPReg rd = _RD;
// Note that in all cases below, LO is actually mapped to HI:LO.
// That is, the host reg is 64 bits and has HI at the top.
// HI is not mappable.
switch (op & 63) {
case 16: // R(rd) = HI; //mfhi
if (gpr.IsImm(MIPS_REG_HI)) {
gpr.SetImm(rd, gpr.GetImm(MIPS_REG_HI));
// LO and HI are in the same reg.
if (gpr.IsImm(MIPS_REG_LO)) {
gpr.SetImm(rd, gpr.GetImm(MIPS_REG_LO) >> 32);
break;
}
gpr.MapDirtyIn(rd, MIPS_REG_HI);
MOV(gpr.R(rd), gpr.R(MIPS_REG_HI));
gpr.MapDirtyIn(rd, MIPS_REG_LO);
UBFX(EncodeRegTo64(gpr.R(rd)), EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32, 32);
break;
case 17: // HI = R(rs); //mthi
if (gpr.IsImm(rs)) {
gpr.SetImm(MIPS_REG_HI, gpr.GetImm(rs));
if (gpr.IsImm(rs) && gpr.IsImm(MIPS_REG_LO)) {
gpr.SetImm(MIPS_REG_HI, (gpr.GetImm(rs) << 32) | (gpr.GetImm(MIPS_REG_LO) & 0xFFFFFFFFULL));
break;
}
gpr.MapDirtyIn(MIPS_REG_HI, rs);
MOV(gpr.R(MIPS_REG_HI), gpr.R(rs));
gpr.MapDirtyIn(MIPS_REG_LO, rs, false);
BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), EncodeRegTo64(gpr.R(rs)), 32, 32);
break;
case 18: // R(rd) = LO; break; //mflo
if (gpr.IsImm(MIPS_REG_LO)) {
gpr.SetImm(rd, gpr.GetImm(MIPS_REG_LO));
gpr.SetImm(rd, gpr.GetImm(MIPS_REG_LO) & 0xFFFFFFFFULL);
break;
}
gpr.MapDirtyIn(rd, MIPS_REG_LO);
@ -574,45 +579,42 @@ void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
break;
case 19: // LO = R(rs); break; //mtlo
if (gpr.IsImm(rs)) {
gpr.SetImm(MIPS_REG_LO, gpr.GetImm(rs));
if (gpr.IsImm(rs) && gpr.IsImm(MIPS_REG_LO)) {
gpr.SetImm(MIPS_REG_HI, gpr.GetImm(rs) | (gpr.GetImm(MIPS_REG_LO) & ~0xFFFFFFFFULL));
break;
}
gpr.MapDirtyIn(MIPS_REG_LO, rs);
MOV(gpr.R(MIPS_REG_LO), gpr.R(rs));
gpr.MapDirtyIn(MIPS_REG_LO, rs, false);
BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), EncodeRegTo64(gpr.R(rs)), 0, 32);
break;
// TODO: All of these could be more elegant if we cached HI and LO together in one 64-bit register!
case 24: //mult (the most popular one). lo,hi = signed mul (rs * rt)
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
s64 result = (s64)(s32)gpr.GetImm(rs) * (s64)(s32)gpr.GetImm(rt);
u64 resultBits = (u64)result;
gpr.SetImm(MIPS_REG_LO, (u32)(resultBits >> 0));
gpr.SetImm(MIPS_REG_HI, (u32)(resultBits >> 32));
gpr.SetImm(MIPS_REG_LO, (u64)result);
break;
}
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);
SMULL(EncodeRegTo64(gpr.R(MIPS_REG_LO)), gpr.R(rs), gpr.R(rt));
LSR(EncodeRegTo64(gpr.R(MIPS_REG_HI)), EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32);
break;
case 25: //multu (2nd) lo,hi = unsigned mul (rs * rt)
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
u64 resultBits = (u64)gpr.GetImm(rs) * (u64)gpr.GetImm(rt);
gpr.SetImm(MIPS_REG_LO, (u32)(resultBits >> 0));
gpr.SetImm(MIPS_REG_HI, (u32)(resultBits >> 32));
gpr.SetImm(MIPS_REG_LO, resultBits);
break;
}
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
MUL(EncodeRegTo64(gpr.R(MIPS_REG_LO)), EncodeRegTo64(gpr.R(rs)), EncodeRegTo64(gpr.R(rt)));
LSR(EncodeRegTo64(gpr.R(MIPS_REG_HI)), EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);
// In case of pointerification, let's use UMULL.
UMULL(EncodeRegTo64(gpr.R(MIPS_REG_LO)), gpr.R(rs), gpr.R(rt));
break;
case 26: //div
// TODO: Does this handle INT_MAX, 0, etc. correctly?
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);
UBFX(SCRATCH1_64, EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32, 32);
SDIV(gpr.R(MIPS_REG_LO), gpr.R(rs), gpr.R(rt));
MSUB(gpr.R(MIPS_REG_HI), gpr.R(rt), gpr.R(MIPS_REG_LO), gpr.R(rs));
MSUB(SCRATCH1, gpr.R(rt), gpr.R(MIPS_REG_LO), gpr.R(rs));
BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), SCRATCH1_64, 32, 32);
break;
case 27: //divu
@ -622,11 +624,10 @@ void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
if (denominator == 0) {
// TODO: Is this correct?
gpr.SetImm(MIPS_REG_LO, 0);
gpr.SetImm(MIPS_REG_HI, 0);
} else {
gpr.MapDirtyDirtyIn(MIPS_REG_LO, MIPS_REG_HI, rs);
gpr.MapDirtyIn(MIPS_REG_LO, rs);
// Remainder is just an AND, neat.
ANDI2R(gpr.R(MIPS_REG_HI), gpr.R(rs), denominator - 1, SCRATCH1);
ANDI2R(SCRATCH1, gpr.R(rs), denominator - 1, SCRATCH1);
int shift = 0;
while (denominator != 0) {
++shift;
@ -638,56 +639,47 @@ void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
} else {
MOV(gpr.R(MIPS_REG_LO), gpr.R(rs));
}
BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), SCRATCH1_64, 32, 32);
}
} else {
// TODO: Does this handle INT_MAX, 0, etc. correctly?
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);
UBFX(SCRATCH1_64, EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32, 32);
UDIV(gpr.R(MIPS_REG_LO), gpr.R(rs), gpr.R(rt));
MSUB(gpr.R(MIPS_REG_HI), gpr.R(rt), gpr.R(MIPS_REG_LO), gpr.R(rs));
MSUB(SCRATCH1, gpr.R(rt), gpr.R(MIPS_REG_LO), gpr.R(rs));
BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), SCRATCH1_64, 32, 32);
}
break;
case 28: //madd
{
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt, false);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);
ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));
ARM64Reg hi64 = EncodeRegTo64(gpr.R(MIPS_REG_HI));
ORR(lo64, lo64, hi64, ArithOption(lo64, ST_LSL, 32));
SMADDL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!
LSR(hi64, lo64, 32);
}
break;
case 29: //maddu
{
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt, false);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);
ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));
ARM64Reg hi64 = EncodeRegTo64(gpr.R(MIPS_REG_HI));
ORR(lo64, lo64, hi64, ArithOption(lo64, ST_LSL, 32));
UMADDL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!
LSR(hi64, lo64, 32);
}
break;
case 46: // msub
{
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt, false);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);
ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));
ARM64Reg hi64 = EncodeRegTo64(gpr.R(MIPS_REG_HI));
ORR(lo64, lo64, hi64, ArithOption(lo64, ST_LSL, 32));
SMSUBL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!
LSR(hi64, lo64, 32);
}
break;
case 47: // msubu
{
gpr.MapDirtyDirtyInIn(MIPS_REG_LO, MIPS_REG_HI, rs, rt, false);
gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);
ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));
ARM64Reg hi64 = EncodeRegTo64(gpr.R(MIPS_REG_HI));
ORR(lo64, lo64, hi64, ArithOption(lo64, ST_LSL, 32));
UMSUBL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!
LSR(hi64, lo64, 32);
break;
}

View File

@ -97,8 +97,12 @@ void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) {
case ML_MEM:
{
int offset = GetMipsRegOffset(mipsReg);
ARM64Reg loadReg = reg;
// INFO_LOG(JIT, "MapRegTo %d mips: %d offset %d", (int)reg, mipsReg, offset);
emit_->LDR(INDEX_UNSIGNED, reg, CTXREG, offset);
if (mipsReg == MIPS_REG_LO) {
loadReg = EncodeRegTo64(loadReg);
}
emit_->LDR(INDEX_UNSIGNED, loadReg, CTXREG, offset);
mr[mipsReg].loc = ML_ARMREG;
break;
}
@ -164,6 +168,10 @@ ARM64Reg Arm64RegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) {
// TODO: Somewhat smarter spilling - currently simply spills the first available, should do
// round robin or FIFO or something.
ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
if (mipsReg == MIPS_REG_HI) {
ERROR_LOG_REPORT(JIT, "Cannot map HI in Arm64RegCache");
return INVALID_REG;
}
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
@ -179,7 +187,8 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
// If reg is written to, pointerification is lost.
ar[armReg].pointerified = false;
}
return (ARM64Reg)mr[mipsReg].reg;
return mr[mipsReg].reg;
}
// Okay, not mapped, so we need to allocate an ARM register.
@ -305,8 +314,11 @@ void Arm64RegCache::FlushArmReg(ARM64Reg r) {
mreg.loc = ML_IMM;
mreg.reg = INVALID_REG;
} else {
ARM64Reg storeReg = r;
if (ar[r].mipsReg == MIPS_REG_LO)
storeReg = EncodeRegTo64(storeReg);
if (ar[r].isDirty && mreg.loc == ML_ARMREG)
emit_->STR(INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
mreg.loc = ML_MEM;
mreg.reg = INVALID_REG;
mreg.imm = 0;
@ -334,7 +346,10 @@ void Arm64RegCache::FlushR(MIPSGPReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
if (r != MIPS_REG_ZERO) {
if (r == MIPS_REG_LO) {
SetRegImm(SCRATCH1_64, mr[r].imm);
emit_->STR(INDEX_UNSIGNED, SCRATCH1_64, CTXREG, GetMipsRegOffset(r));
} else if (r != MIPS_REG_ZERO) {
SetRegImm(SCRATCH1, mr[r].imm);
emit_->STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, GetMipsRegOffset(r));
}
@ -347,7 +362,10 @@ void Arm64RegCache::FlushR(MIPSGPReg r) {
}
if (ar[mr[r].reg].isDirty) {
if (r != MIPS_REG_ZERO) {
emit_->STR(INDEX_UNSIGNED, mr[r].reg, CTXREG, GetMipsRegOffset(r));
ARM64Reg storeReg = mr[r].reg;
if (r == MIPS_REG_LO)
storeReg = EncodeRegTo64(storeReg);
emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(r));
}
ar[mr[r].reg].isDirty = false;
}
@ -382,7 +400,7 @@ void Arm64RegCache::FlushAll() {
}
}
void Arm64RegCache::SetImm(MIPSGPReg r, u32 immVal) {
void Arm64RegCache::SetImm(MIPSGPReg r, u64 immVal) {
if (r == MIPS_REG_ZERO && immVal != 0)
ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal);
@ -405,7 +423,7 @@ bool Arm64RegCache::IsImm(MIPSGPReg r) const {
return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM;
}
u32 Arm64RegCache::GetImm(MIPSGPReg r) const {
u64 Arm64RegCache::GetImm(MIPSGPReg r) const {
if (r == MIPS_REG_ZERO) return 0;
if (mr[r].loc != ML_IMM && mr[r].loc != ML_ARMREG_IMM) {
ERROR_LOG_REPORT(JIT, "Trying to get imm from non-imm register %i", r);
@ -450,7 +468,7 @@ void Arm64RegCache::ReleaseSpillLock(MIPSGPReg reg) {
ARM64Reg Arm64RegCache::R(MIPSGPReg mipsReg) {
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
return (ARM64Reg)mr[mipsReg].reg;
return mr[mipsReg].reg;
} else {
ERROR_LOG_REPORT(JIT, "Reg %i not in arm reg. compilerPC = %08x", mipsReg, compilerPC_);
return INVALID_REG; // BAAAD

View File

@ -77,8 +77,8 @@ struct RegARM {
struct RegMIPS {
// Where is this MIPS register?
Arm64JitConstants::RegMIPSLoc loc;
// Data (only one of these is used, depending on loc. Could make a union).
u32 imm;
// Data (both or only one may be used, depending on loc.)
u64 imm;
Arm64Gen::ARM64Reg reg; // reg index
bool spillLock; // if true, this register cannot be spilled.
// If loc == ML_MEM, it's back in its location in the CPU context struct.
@ -103,9 +103,9 @@ public:
void ReleaseSpillLock(MIPSGPReg reg);
void ReleaseSpillLocks();
void SetImm(MIPSGPReg reg, u32 immVal);
void SetImm(MIPSGPReg reg, u64 immVal);
bool IsImm(MIPSGPReg reg) const;
u32 GetImm(MIPSGPReg reg) const;
u64 GetImm(MIPSGPReg reg) const;
// Optimally set a register to an imm value (possibly using another register.)
void SetRegImm(Arm64Gen::ARM64Reg reg, u64 imm);

View File

@ -170,6 +170,9 @@ public:
// If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code.
u32 vfpuCtrl[16];
// ARM64 wants lo/hi to be aligned to 64 bits from the base of this struct.
u32 padLoHi;
union {
struct {
u32 pc;