ARM64: Turn off some debugging stuff, start implementing instructions. Something is wrong though, if I implement more than addiu things break..

This commit is contained in:
Henrik Rydgard 2015-03-15 23:38:21 +01:00
parent 742f48ad56
commit 8dc77ff32e
6 changed files with 300 additions and 48 deletions

View File

@ -19,6 +19,8 @@
#include <vector>
#include <cstdio>
#include "base/logging.h"
#include "Common/MsgHandler.h"
#include "Common/StdMutex.h"
#include "Common/Atomics.h"

View File

@ -20,6 +20,8 @@
#include <queue>
#include <algorithm>
#include "base/logging.h"
#include "Common/LogManager.h"
#include "Common/CommonTypes.h"
#include "Core/HLE/HLE.h"

View File

@ -32,7 +32,7 @@ using namespace Arm64Gen;
//static int temp32; // unused?
static const bool enableDebug = true;
static const bool enableDebug = false;
//static bool enableStatistics = false; //unused?
@ -77,10 +77,10 @@ static const bool enableDebug = true;
extern volatile CoreState coreState;
void ShowPC(u32 sp) {
void ShowPC(u32 sp, void *membase, void *jitbase) {
static int count = 0;
if (currentMIPS) {
ELOG("ShowPC : %08x ArmSP : %08x %d", currentMIPS->pc, sp, count);
ELOG("ShowPC : %08x Downcount : %08x %d %p %p", currentMIPS->pc, sp, count);
} else {
ELOG("Universe corrupt?");
}
@ -127,7 +127,7 @@ void Arm64Jit::GenerateFixedCode() {
outerLoopPCInSCRATCH1 = GetCodePtr();
MovToPC(SCRATCH1);
outerLoop = GetCodePtr();
SaveDowncount();
SaveDowncount(); // Advance can change the downcount, so must save/restore
RestoreRoundingMode(true);
QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance);
ApplyRoundingMode(true);
@ -164,19 +164,22 @@ void Arm64Jit::GenerateFixedCode() {
// Debug
if (enableDebug) {
MOV(W0, DOWNCOUNTREG);
MOV(X1, MEMBASEREG);
MOV(X2, JITBASEREG);
QuickCallFunction(SCRATCH1, (void *)&ShowPC);
}
LDR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, pc));
LDR(SCRATCH1, MEMBASEREG, SCRATCH1_64);
ANDI2R(SCRATCH2, SCRATCH1, 0xFF000000); // rotation is to the right, in 2-bit increments.
ANDI2R(SCRATCH1, SCRATCH1, 0x00FFFFFF); // TODO: Replace this and the next op by a bit field extract
LSR(SCRATCH2, SCRATCH2, 24);
CMP(SCRATCH2, MIPS_EMUHACK_OPCODE>>24);
LSR(SCRATCH2, SCRATCH1, 24);
ANDI2R(SCRATCH1, SCRATCH1, 0x00FFFFFF);
CMP(SCRATCH2, MIPS_EMUHACK_OPCODE >> 24);
FixupBranch skipJump = B(CC_NEQ);
ADD(SCRATCH1_64, JITBASEREG, SCRATCH1_64);
BR(SCRATCH1_64);
SetJumpTarget(skipJump);
// No block found, let's jit
SaveDowncount();
RestoreRoundingMode(true);
@ -204,15 +207,16 @@ void Arm64Jit::GenerateFixedCode() {
ABI_PopRegisters(regs_to_save);
RET();
// Don't forget to zap the instruction cache!
FlushIcache();
INFO_LOG(JIT, "THE DISASM : %p ========================", enterCode);
std::vector<std::string> lines = DisassembleArm64(enterCode, GetCodePtr() - enterCode);
for (auto s : lines) {
INFO_LOG(JIT, "%s", s.c_str());
}
INFO_LOG(JIT, "END OF THE DISASM : %p ========================", GetCodePtr());
// Don't forget to zap the instruction cache!
FlushIcache();
}
} // namespace MIPSComp

View File

@ -49,16 +49,277 @@ namespace MIPSComp
using namespace Arm64Gen;
using namespace Arm64JitConstants;
static u32 EvalOr(u32 a, u32 b) { return a | b; }
static u32 EvalEor(u32 a, u32 b) { return a ^ b; }
static u32 EvalAnd(u32 a, u32 b) { return a & b; }
static u32 EvalAdd(u32 a, u32 b) { return a + b; }
static u32 EvalSub(u32 a, u32 b) { return a - b; }
void Arm64Jit::CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARM64XEmitter::*arith)(ARM64Reg dst, ARM64Reg src, ARM64Reg src2), bool (ARM64XEmitter::*tryArithI2R)(ARM64Reg dst, ARM64Reg src, u32 val), u32 (*eval)(u32 a, u32 b)) {
if (gpr.IsImm(rs)) {
gpr.SetImm(rt, (*eval)(gpr.GetImm(rs), uimm));
} else {
gpr.MapDirtyIn(rt, rs);
if (!(this->*tryArithI2R)(gpr.R(rt), gpr.R(rs), uimm)) {
gpr.SetRegImm(SCRATCH1, uimm);
(this->*arith)(gpr.R(rt), gpr.R(rs), SCRATCH1);
}
}
}
void Arm64Jit::Comp_IType(MIPSOpcode op) {
DISABLE;
CONDITIONAL_DISABLE;
s32 simm = (s32)(s16)(op & 0xFFFF); // sign extension
u32 uimm = op & 0xFFFF;
u32 suimm = (u32)(s32)simm;
MIPSGPReg rt = _RT;
MIPSGPReg rs = _RS;
// noop, won't write to ZERO.
if (rt == 0)
return;
switch (op >> 26) {
case 8: // same as addiu?
case 9: // R(rt) = R(rs) + simm; break; //addiu
CompImmLogic(rs, rt, simm, &ARM64XEmitter::ADD, &ARM64XEmitter::TryADDI2R, &EvalAdd);
break;
/*
case 12: CompImmLogic(rs, rt, uimm, &ARM64XEmitter::AND, &ARM64XEmitter::TryANDI2R, &EvalAnd); break;
case 13: CompImmLogic(rs, rt, uimm, &ARM64XEmitter::ORR, &ARM64XEmitter::TryORRI2R, &EvalOr); break;
case 14: CompImmLogic(rs, rt, uimm, &ARM64XEmitter::EOR, &ARM64XEmitter::TryEORI2R, &EvalEor); break;
*/
/*
case 10: // R(rt) = (s32)R(rs) < simm; break; //slti
{
if (gpr.IsImm(rs)) {
gpr.SetImm(rt, (s32)gpr.GetImm(rs) < simm ? 1 : 0);
break;
} else if (simm == 0) {
gpr.MapDirtyIn(rt, rs);
// Shift to get the sign bit only (for < 0.)
LSR(gpr.R(rt), gpr.R(rs), 31);
break;
}
gpr.MapDirtyIn(rt, rs);
if (!TryCMPI2R(gpr.R(rs), simm)) {
gpr.SetRegImm(SCRATCHREG1, simm);
CMP(gpr.R(rs), SCRATCHREG1);
}
SetCC(CC_LT);
MOVI2R(gpr.R(rt), 1);
SetCC(CC_GE);
MOVI2R(gpr.R(rt), 0);
SetCC(CC_AL);
}
break;
case 11: // R(rt) = R(rs) < suimm; break; //sltiu
{
if (gpr.IsImm(rs)) {
gpr.SetImm(rt, gpr.GetImm(rs) < suimm ? 1 : 0);
break;
}
gpr.MapDirtyIn(rt, rs);
if (!TryCMPI2R(gpr.R(rs), suimm)) {
gpr.SetRegImm(SCRATCHREG1, suimm);
CMP(gpr.R(rs), SCRATCHREG1);
}
SetCC(CC_LO);
MOVI2R(gpr.R(rt), 1);
SetCC(CC_HS);
MOVI2R(gpr.R(rt), 0);
SetCC(CC_AL);
}
break;*/
case 15: // R(rt) = uimm << 16; //lui
gpr.SetImm(rt, uimm << 16);
break;
default:
Comp_Generic(op);
break;
}
}
void Arm64Jit::Comp_RType2(MIPSOpcode op) {
CONDITIONAL_DISABLE;
DISABLE;
MIPSGPReg rs = _RS;
MIPSGPReg rd = _RD;
// Don't change $zr.
if (rd == 0)
return;
switch (op & 63) {
case 22: //clz
if (gpr.IsImm(rs)) {
u32 value = gpr.GetImm(rs);
int x = 31;
int count = 0;
while (x >= 0 && !(value & (1 << x))) {
count++;
x--;
}
gpr.SetImm(rd, count);
break;
}
gpr.MapDirtyIn(rd, rs);
CLZ(gpr.R(rd), gpr.R(rs));
break;
case 23: //clo
if (gpr.IsImm(rs)) {
u32 value = gpr.GetImm(rs);
int x = 31;
int count = 0;
while (x >= 0 && (value & (1 << x))) {
count++;
x--;
}
gpr.SetImm(rd, count);
break;
}
gpr.MapDirtyIn(rd, rs);
MVN(SCRATCH1, gpr.R(rs));
CLZ(gpr.R(rd), SCRATCH1);
break;
default:
DISABLE;
}
}
void Arm64Jit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, void (ARM64XEmitter::*arith)(ARM64Reg dst, ARM64Reg rm, ARM64Reg rn), bool (ARM64XEmitter::*tryArithI2R)(ARM64Reg dst, ARM64Reg rm, u32 val), u32(*eval)(u32 a, u32 b), bool symmetric) {
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
gpr.SetImm(rd, (*eval)(gpr.GetImm(rs), gpr.GetImm(rt)));
return;
}
if (gpr.IsImm(rt) || (gpr.IsImm(rs) && symmetric)) {
MIPSGPReg lhs = gpr.IsImm(rs) ? rt : rs;
MIPSGPReg rhs = gpr.IsImm(rs) ? rs : rt;
u32 rhsImm = gpr.GetImm(rhs);
gpr.MapDirtyIn(rd, lhs);
if ((this->*tryArithI2R)(gpr.R(rd), gpr.R(lhs), rhsImm)) {
return;
}
// If rd is rhs, we may have lost it in the MapDirtyIn(). lhs was kept.
if (rd == rhs) {
// Luckily, it was just an imm.
gpr.SetImm(rhs, rhsImm);
}
}
// Can't do the RSB optimization on ARM64 - no RSB!
// Generic solution. If it's an imm, better to flush at this point.
gpr.MapDirtyInIn(rd, rs, rt);
(this->*arith)(gpr.R(rd), gpr.R(rs), gpr.R(rt));
}
void Arm64Jit::Comp_RType3(MIPSOpcode op) {
DISABLE;
CONDITIONAL_DISABLE;
MIPSGPReg rt = _RT;
MIPSGPReg rs = _RS;
MIPSGPReg rd = _RD;
// noop, won't write to ZERO.
if (rd == 0)
return;
switch (op & 63) {
case 10: //if (!R(rt)) R(rd) = R(rs); break; //movz
DISABLE;
break;
case 11:// if (R(rt)) R(rd) = R(rs); break; //movn
DISABLE;
break;
case 32: //R(rd) = R(rs) + R(rt); break; //add
case 33: //R(rd) = R(rs) + R(rt); break; //addu
// We optimize out 0 as an operand2 ADD.
DISABLE;
CompType3(rd, rs, rt, &ARM64XEmitter::ADD, &ARM64XEmitter::TryADDI2R, &EvalAdd, true);
break;
case 34: //R(rd) = R(rs) - R(rt); break; //sub
case 35: //R(rd) = R(rs) - R(rt); break; //subu
DISABLE;
CompType3(rd, rs, rt, &ARM64XEmitter::SUB, &ARM64XEmitter::TrySUBI2R, &EvalSub, false);
break;
case 36: //R(rd) = R(rs) & R(rt); break; //and
DISABLE;
CompType3(rd, rs, rt, &ARM64XEmitter::AND, &ARM64XEmitter::TryANDI2R, &EvalAnd, true);
break;
case 37: //R(rd) = R(rs) | R(rt); break; //or
DISABLE;
CompType3(rd, rs, rt, &ARM64XEmitter::ORR, &ARM64XEmitter::TryORRI2R, &EvalOr, true);
break;
case 38: //R(rd) = R(rs) ^ R(rt); break; //xor/eor
DISABLE;
CompType3(rd, rs, rt, &ARM64XEmitter::EOR, &ARM64XEmitter::TryEORI2R, &EvalEor, true);
break;
case 39: // R(rd) = ~(R(rs) | R(rt)); break; //nor
DISABLE;
break;
case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt
DISABLE;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
gpr.SetImm(rd, (s32)gpr.GetImm(rs) < (s32)gpr.GetImm(rt));
} else {
// TODO: Optimize imm cases
gpr.MapDirtyInIn(rd, rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
CSET(gpr.R(rd), CC_LT);
}
break;
case 43: //R(rd) = R(rs) < R(rt); break; //sltu
DISABLE;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
gpr.SetImm(rd, gpr.GetImm(rs) < gpr.GetImm(rt));
} else {
gpr.MapDirtyInIn(rd, rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
CSET(gpr.R(rd), CC_LO);
}
break;
case 44: //R(rd) = max(R(rs), R(rt); break; //max
DISABLE;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
gpr.SetImm(rd, std::max(gpr.GetImm(rs), gpr.GetImm(rt)));
break;
}
gpr.MapDirtyInIn(rd, rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
CSEL(gpr.R(rd), gpr.R(rs), gpr.R(rt), CC_GE);
break;
case 45: //R(rd) = min(R(rs), R(rt)); break; //min
DISABLE;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
gpr.SetImm(rd, std::min(gpr.GetImm(rs), gpr.GetImm(rt)));
break;
}
gpr.MapDirtyInIn(rd, rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
CSEL(gpr.R(rd), gpr.R(rs), gpr.R(rt), CC_LE);
break;
default:
Comp_Generic(op);
break;
}
}
void Arm64Jit::Comp_ShiftType(MIPSOpcode op) {

View File

@ -74,8 +74,7 @@ Arm64Jit::Arm64Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &js, &jo), f
Arm64Jit::~Arm64Jit() {
}
void Arm64Jit::DoState(PointerWrap &p)
{
void Arm64Jit::DoState(PointerWrap &p) {
auto s = p.Section("Jit", 1, 2);
if (!s)
return;
@ -90,8 +89,7 @@ void Arm64Jit::DoState(PointerWrap &p)
}
// This is here so the savestate matches between jit and non-jit.
void Arm64Jit::DoDummyState(PointerWrap &p)
{
void Arm64Jit::DoDummyState(PointerWrap &p) {
auto s = p.Section("Jit", 1, 2);
if (!s)
return;
@ -111,8 +109,7 @@ void Arm64Jit::FlushAll()
FlushPrefixV();
}
void Arm64Jit::FlushPrefixV()
{
void Arm64Jit::FlushPrefixV() {
if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) {
gpr.SetRegImm(SCRATCH1, js.prefixS);
STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));
@ -132,21 +129,18 @@ void Arm64Jit::FlushPrefixV()
}
}
void Arm64Jit::ClearCache()
{
void Arm64Jit::ClearCache() {
ILOG("ARM64Jit: Clearing the cache!");
blocks.Clear();
ClearCodeSpace();
GenerateFixedCode();
}
void Arm64Jit::InvalidateCache()
{
void Arm64Jit::InvalidateCache() {
blocks.Clear();
}
void Arm64Jit::InvalidateCacheAt(u32 em_address, int length)
{
void Arm64Jit::InvalidateCacheAt(u32 em_address, int length) {
blocks.InvalidateICache(em_address, length);
}
@ -164,8 +158,7 @@ void Arm64Jit::EatInstruction(MIPSOpcode op) {
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
void Arm64Jit::CompileDelaySlot(int flags)
{
void Arm64Jit::CompileDelaySlot(int flags) {
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
// delay slot, we're screwed.
@ -277,11 +270,7 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
{
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
MIPSOpcode inst = Memory::Read_Opcode_JIT(js.compilerPC);
//MIPSInfo info = MIPSGetInfo(inst);
//if (info & IS_VFPU) {
// logBlocks = 1;
//}
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
MIPSCompileOp(inst);
@ -327,10 +316,9 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
// Don't forget to zap the newly written instructions in the instruction cache!
FlushIcache();
if (js.lastContinuedPC == 0)
if (js.lastContinuedPC == 0) {
b->originalSize = js.numInstructions;
else
{
} else {
// We continued at least once. Add the last proxy and set the originalSize correctly.
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
b->originalSize = js.initialBlockSize;
@ -339,8 +327,7 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
return b->normalEntry;
}
void Arm64Jit::AddContinuedBlock(u32 dest)
{
void Arm64Jit::AddContinuedBlock(u32 dest) {
// The first block is the root block. When we continue, we create proxy blocks after that.
if (js.lastContinuedPC == 0)
js.initialBlockSize = js.numInstructions;
@ -349,14 +336,12 @@ void Arm64Jit::AddContinuedBlock(u32 dest)
js.lastContinuedPC = dest;
}
bool Arm64Jit::DescribeCodePtr(const u8 *ptr, std::string &name)
{
bool Arm64Jit::DescribeCodePtr(const u8 *ptr, std::string &name) {
// TODO: Not used by anything yet.
return false;
}
void Arm64Jit::Comp_RunBlock(MIPSOpcode op)
{
void Arm64Jit::Comp_RunBlock(MIPSOpcode op) {
// This shouldn't be necessary, the dispatcher should catch us before we get here.
ERROR_LOG(JIT, "Comp_RunBlock should never be reached!");
}
@ -403,6 +388,7 @@ void Arm64Jit::MovToPC(ARM64Reg r) {
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
}
// Should not really be necessary except when entering Advance
void Arm64Jit::SaveDowncount() {
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
}
@ -436,8 +422,7 @@ void Arm64Jit::UpdateRoundingMode() {
// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
// I don't think this gives us that much benefit.
void Arm64Jit::WriteExit(u32 destination, int exit_num)
{
void Arm64Jit::WriteExit(u32 destination, int exit_num) {
WriteDownCount();
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
@ -456,16 +441,14 @@ void Arm64Jit::WriteExit(u32 destination, int exit_num)
}
}
void Arm64Jit::WriteExitDestInR(ARM64Reg Reg)
{
void Arm64Jit::WriteExitDestInR(ARM64Reg Reg) {
MovToPC(Reg);
WriteDownCount();
// TODO: shouldn't need an indirect branch here...
B((const void *)dispatcher);
}
void Arm64Jit::WriteSyscallExit()
{
void Arm64Jit::WriteSyscallExit() {
WriteDownCount();
B((const void *)dispatcherCheckCoreState);
}

View File

@ -237,7 +237,7 @@ private:
void BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely);
// Utilities to reduce duplicated code
void CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARM64XEmitter::*arith)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, Arm64Gen::ARM64Reg op2), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, u32 val), u32 (*eval)(u32 a, u32 b));
void CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARM64XEmitter::*arith)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, Arm64Gen::ARM64Reg src2), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, u32 val), u32 (*eval)(u32 a, u32 b));
void CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, void (ARM64XEmitter::*arithOp2)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rn), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg rm, u32 val), u32 (*eval)(u32 a, u32 b), bool symmetric = false);
void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz);