diff --git a/CMakeLists.txt b/CMakeLists.txt index 397d491bdd..2b521c8147 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -128,6 +128,7 @@ if(ARM) set(CommonExtra ${CommonExtra} Common/ArmABI.h Common/ArmABI.cpp + Common/ArmCPUDetect.cpp Common/ArmEmitter.h Common/ArmEmitter.cpp Common/ThunkARM.cpp) diff --git a/Common/ArmABI.cpp b/Common/ArmABI.cpp index 63824d808f..fa86924dfb 100644 --- a/Common/ArmABI.cpp +++ b/Common/ArmABI.cpp @@ -2,7 +2,7 @@ // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. +// the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -47,6 +47,17 @@ void ARMXEmitter::ARMABI_CallFunctionCC(void *func, u32 Arg1, u32 Arg2) POP(5, R0, R1, R2, R3, _LR); } +void ARMXEmitter::ARMABI_CallFunctionCCC(void *func, u32 Arg1, u32 Arg2, u32 Arg3) +{ + ARMABI_MOVI2R(R14, Mem(func)); + PUSH(5, R0, R1, R2, R3, _LR); + ARMABI_MOVI2R(R0, Arg1); + ARMABI_MOVI2R(R1, Arg2); + ARMABI_MOVI2R(R2, Arg3); + BL(R14); + POP(5, R0, R1, R2, R3, _LR); +} + void ARMXEmitter::ARMABI_PushAllCalleeSavedRegsAndAdjustStack() { // Note: 4 * 4 = 16 bytes, so alignment is preserved. PUSH(4, R0, R1, R2, R3); @@ -55,6 +66,7 @@ void ARMXEmitter::ARMABI_PushAllCalleeSavedRegsAndAdjustStack() { void ARMXEmitter::ARMABI_PopAllCalleeSavedRegsAndAdjustStack() { POP(4, R0, R1, R2, R3); } + void ARMXEmitter::ARMABI_MOVI2R(ARMReg reg, Operand2 val) { // TODO: There are more fancy ways to save calls if we check if @@ -72,6 +84,7 @@ void ARMXEmitter::ARMABI_MOVI2M(Operand2 op, Operand2 val) MOVW(R12, op); MOVT(R12, op, true); STR(R12, R14); // R10 is what we want to store } + const char *conditions[] = {"EQ", "NEQ", "CS", "CC", "MI", "PL", "VS", "VC", "HI", "LS", "GE", "LT", "GT", "LE", "AL" }; static void ShowCondition(u32 cond) { diff --git a/Common/ArmABI.h b/Common/ArmABI.h index 1646d10682..5f27879700 100644 --- a/Common/ArmABI.h +++ b/Common/ArmABI.h @@ -2,7 +2,7 @@ // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. +// the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/Common/ArmCPUDetect.cpp b/Common/ArmCPUDetect.cpp new file mode 100644 index 0000000000..1e01732e23 --- /dev/null +++ b/Common/ArmCPUDetect.cpp @@ -0,0 +1,153 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "Common.h" +#include "CPUDetect.h" +#include "StringUtil.h" + +const char procfile[] = "/proc/cpuinfo"; + +char *GetCPUString() +{ + const char marker[] = "Hardware\t: "; + char *cpu_string = 0; + // Count the number of processor lines in /proc/cpuinfo + char buf[1024]; + FILE *fp; + + fp = fopen(procfile, "r"); + if (!fp) + return 0; + + while (fgets(buf, sizeof(buf), fp)) + { + if (strncmp(buf, marker, sizeof(marker) - 1)) + continue; + cpu_string = buf + sizeof(marker) - 1; + cpu_string = strndup(cpu_string, strlen(cpu_string) - 1); // Strip the newline + break; + } + return cpu_string; +} +bool CheckCPUFeature(const char *feature) +{ + const char marker[] = "Features\t: "; + char buf[1024]; + FILE *fp; + + fp = fopen(procfile, "r"); + if (!fp) + return 0; + + while (fgets(buf, sizeof(buf), fp)) + { + if (strncmp(buf, marker, sizeof(marker) - 1)) + continue; + char *featurestring = buf + sizeof(marker) - 1; + char *token = strtok(featurestring, " "); + while (token != NULL) + { + if (strstr(token, feature)) + return true; + token = strtok(NULL, " "); + } + } + return false; +} +int GetCoreCount() +{ + const char marker[] = "processor\t: "; + int cores = 0; + char buf[1024]; + FILE *fp; + + fp = fopen(procfile, "r"); + if (!fp) + return 0; + + while (fgets(buf, sizeof(buf), fp)) + { + if (strncmp(buf, marker, sizeof(marker) - 1)) + continue; + ++cores; + } + return cores; +} + +CPUInfo cpu_info; + +CPUInfo::CPUInfo() { + Detect(); +} + +// Detects the various cpu features +void CPUInfo::Detect() +{ + // Set some defaults here + // When ARMv8 cpus come out, these need to be updated. + HTT = false; + OS64bit = false; + CPU64bit = false; + Mode64bit = false; + vendor = VENDOR_ARM; + + // Get the information about the CPU + strncpy(cpu_string, GetCPUString(), sizeof(cpu_string)); + num_cores = GetCoreCount(); + bSwp = CheckCPUFeature("swp"); + bHalf = CheckCPUFeature("half"); + bThumb = CheckCPUFeature("thumb"); + bFastMult = CheckCPUFeature("fastmult"); + bVFP = CheckCPUFeature("vfp"); + bEDSP = CheckCPUFeature("edsp"); + bThumbEE = CheckCPUFeature("thumbee"); + bNEON = CheckCPUFeature("neon"); + bVFPv3 = CheckCPUFeature("vfpv3"); + bTLS = CheckCPUFeature("tls"); + bVFPv4 = CheckCPUFeature("vfpv4"); + bIDIVa = CheckCPUFeature("idiva"); + bIDIVt = CheckCPUFeature("idivt"); + // These two are ARMv8 specific. + bFP = CheckCPUFeature("fp"); + bASIMD = CheckCPUFeature("asimd"); +} + +// Turn the cpu info into a string we can show +std::string CPUInfo::Summarize() +{ + std::string sum; + if (num_cores == 1) + sum = StringFromFormat("%s, %i core", cpu_string, num_cores); + else + sum = StringFromFormat("%s, %i cores", cpu_string, num_cores); + + if (bSwp) sum += ", SWP"; + if (bHalf) sum += ", Half"; + if (bThumb) sum += ", Thumb"; + if (bFastMult) sum += ", FastMult"; + if (bVFP) sum += ", VFP"; + if (bEDSP) sum += ", EDSP"; + if (bThumbEE) sum += ", ThumbEE"; + if (bNEON) sum += ", NEON"; + if (bVFPv3) sum += ", VFPv3"; + if (bTLS) sum += ", TLS"; + if (bVFPv4) sum += ", VFPv4"; + if (bIDIVa) sum += ", IDIVa"; + if (bIDIVt) sum += ", IDIVt"; + + return sum; +} diff --git a/Common/ArmEmitter.cpp b/Common/ArmEmitter.cpp index 17ef8e711f..5de8fb6d7b 100644 --- a/Common/ArmEmitter.cpp +++ b/Common/ArmEmitter.cpp @@ -2,7 +2,7 @@ // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. +// the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -61,7 +61,7 @@ const u8 *ARMXEmitter::AlignCodePage() void ARMXEmitter::Flush() { - __builtin___clear_cache (startcode, code); + __clear_cache (startcode, code); SLEEP(0); } void ARMXEmitter::SetCC(CCFlags cond) @@ -315,6 +315,22 @@ void ARMXEmitter::WriteInstruction (u32 Op, ARMReg Rd, ARMReg Rn, Operand2 Rm, b } // Data Operations +void ARMXEmitter::WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2) +{ + Write32(condition | (0x7 << 24) | (Op << 20) | (dest << 16) | (Op2 << 12) | (r1 << 8) | (Op3 << 5) | (1 << 4) | r2); +} +void ARMXEmitter::UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor) +{ + if (!cpu_info.bIDIVa) + PanicAlert("Trying to use integer divide on hardware that doesn't support it. Bad programmer."); + WriteSignedMultiply(3, 0xF, 0, dest, divisor, dividend); +} +void ARMXEmitter::SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor) +{ + if (!cpu_info.bIDIVa) + PanicAlert("Trying to use integer divide on hardware that doesn't support it. Bad programmer."); + WriteSignedMultiply(1, 0xF, 0, dest, divisor, dividend); +} void ARMXEmitter::LSL (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, false, dest, src, op2);} void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, true, dest, src, op2);} void ARMXEmitter::LSL (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, false, dest, src, op2);} @@ -428,6 +444,99 @@ void ARMXEmitter::LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...) va_end(vl); WriteRegStoreOp(0x89, dest, WriteBack, RegList); } + +// NEON and ASIMD +void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, Operand2 op) +{ + _assert_msg_(DYNA_REC, Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR"); + _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid Base register to VLDR"); + bool single_reg = Dest < D0; + if (single_reg) + Dest = (ARMReg)(Dest - S0); + else + Dest = (ARMReg)(Dest - D0); + Write32(NO_COND | (13 << 24) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \ + | (5 << 9) | (!single_reg << 8) | op.Imm8()); +} +void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) +{ + if (Dest > R15) + { + if (Src < S0) + { + if (Dest < D0) + { + // Moving to a Neon register FROM ARM Reg + Dest = (ARMReg)(Dest - S0); + Write32(NO_COND | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \ + | (0xA << 8) | ((Dest & 0x1) << 7) | (1 << 4)); + return; + } + else + { + // Move 64bit from Arm reg + _assert_msg_(DYNA_REC, false, "This VMOV doesn't support moving 64bit ARM to NEON"); + } + } + } + else + { + if (Src > R15) + { + if (Src < D0) + { + // Moving to ARM Reg from Neon Register + Src = (ARMReg)(Src - S0); + Write32(NO_COND | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \ + | (0xA << 8) | ((Src & 0x1) << 7) | (1 << 4)); + + return; + } + else + { + // Move 64bit To Arm reg + _assert_msg_(DYNA_REC, false, "This VMOV doesn't support moving 64bit ARM From NEON"); + } + } + else + { + // Move Arm reg to Arm reg + _assert_msg_(DYNA_REC, false, "VMOV doesn't support moving ARM registers"); + } + } + // Moving NEON registers + int SrcSize = Src < D0 ? 1 : Src < Q0 ? 2 : 4; + int DestSize = Dest < D0 ? 1 : Dest < Q0 ? 2 : 4; + bool Single = DestSize == 1; + _assert_msg_(DYNA_REC, SrcSize == DestSize, "VMOV doesn't support moving different register sizes"); + if (Single) + { + Dest = (ARMReg)(Dest - S0); + Src = (ARMReg)(Src - S0); + Write32(NO_COND | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \ + | (0x5 << 9) | (1 << 6) | ((Src & 0x1) << 5) | ((Src & 0x1E) >> 1)); + } + else + { + // Double and quad + bool Quad = DestSize == 4; + if (Quad) + { + // Gets encoded as a Double register + Dest = (ARMReg)((Dest - Q0) * 2); + Src = (ARMReg)((Src - Q0) * 2); + } + else + { + Dest = (ARMReg)(Dest - D0); + Src = (ARMReg)(Src - D0); + } + Write32((0xF2 << 24) | ((Dest & 0x10) << 18) | (1 << 21) | ((Src & 0xF) << 16) \ + | ((Dest & 0xF) << 12) | (1 << 8) | ((Src & 0x10) << 3) | (Quad << 6) \ + | ((Src & 0x10) << 1) | (1 << 4) | (Src & 0xF)); + } +} + // helper routines for setting pointers void ARMXEmitter::CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2) { diff --git a/Common/ArmEmitter.h b/Common/ArmEmitter.h index 0af0e636aa..a452e6fd4d 100644 --- a/Common/ArmEmitter.h +++ b/Common/ArmEmitter.h @@ -2,7 +2,7 @@ // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. +// the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -23,11 +23,12 @@ #include "Common.h" #include "MemoryUtil.h" -#undef _SP - namespace ArmGen { - +#undef _IP +#undef _SP +#undef _LR +#undef _PC enum ARMReg { // GPRs @@ -42,17 +43,21 @@ enum ARMReg // VFP single precision registers - S0 = 0, S1, S2, S3, S4, S5, S6, + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, // VFP Double Precision registers - D0 = 0, D1, D2, D3, D4, D5, D6, D7, + D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, + + // ASIMD Quad-Word registers + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, + Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, INVALID_REG = 0xFFFFFFFF }; @@ -80,12 +85,12 @@ const u32 NO_COND = 0xE0000000; enum ShiftType { - LSL = 0, - ASL = 0, - LSR = 1, - ASR = 2, - ROR = 3, - RRX = 4 + ST_LSL = 0, + ST_ASL = 0, + ST_LSR = 1, + ST_ASR = 2, + ST_ROR = 3, + ST_RRX = 4 }; enum @@ -147,7 +152,7 @@ public: Operand2(ARMReg base, ShiftType type, ARMReg shift) // RSR { Type = TYPE_RSR; - _assert_msg_(DYNA_REC, type != RRX, "Invalid Operand2: RRX does not take a register shift amount"); + _assert_msg_(DYNA_REC, type != ST_RRX, "Invalid Operand2: RRX does not take a register shift amount"); IndexOrShift = shift; Shift = type; Value = base; @@ -158,31 +163,31 @@ public: if(shift == 32) shift = 0; switch (type) { - case LSL: + case ST_LSL: _assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: LSL %u", shift); break; - case LSR: + case ST_LSR: _assert_msg_(DYNA_REC, shift <= 32, "Invalid Operand2: LSR %u", shift); if (!shift) - type = LSL; + type = ST_LSL; if (shift == 32) shift = 0; break; - case ASR: + case ST_ASR: _assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: LSR %u", shift); if (!shift) - type = LSL; + type = ST_LSL; if (shift == 32) shift = 0; break; - case ROR: + case ST_ROR: _assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: ROR %u", shift); if (!shift) - type = LSL; + type = ST_LSL; break; - case RRX: + case ST_RRX: _assert_msg_(DYNA_REC, shift == 0, "Invalid Operand2: RRX does not take an immediate shift amount"); - type = ROR; + type = ST_ROR; break; } IndexOrShift = shift; @@ -228,6 +233,11 @@ public: _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm5 not IMM value"); return ((Value & 0x0000001F) << 7); } + const u32 Imm8() + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8Rot not IMM value"); + return Value & 0xFF; + } const u32 Imm8Rot() // IMM8 with Rotation { _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8Rot not IMM value"); @@ -269,6 +279,17 @@ public: _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM"); return (Value & 0x0FFFFFFF); } + // NEON and ASIMD specific + const u32 Imm8ASIMD() + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8ASIMD not IMM"); + return ((Value & 0x80) << 17) | ((Value & 0x70) << 12) | (Value & 0xF); + } + const u32 Imm8VFP() + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8VFP not IMM"); + return ((Value & 0xF0) << 12) | (Value & 0xF); + } }; @@ -300,6 +321,7 @@ private: void WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList); void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2); void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2); + void WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2); // New Ops void WriteInstruction(u32 op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags = false); @@ -385,13 +407,17 @@ public: void ORRS(ARMReg dest, ARMReg src, Operand2 op2); void MOV (ARMReg dest, Operand2 op2); void MOVS(ARMReg dest, Operand2 op2); - void BIC (ARMReg dest, ARMReg src, Operand2 op2); + void BIC (ARMReg dest, ARMReg src, Operand2 op2); // BIC = ANDN void BICS(ARMReg dest, ARMReg src, Operand2 op2); void MVN (ARMReg dest, Operand2 op2); void MVNS(ARMReg dest, Operand2 op2); void MOVW(ARMReg dest, Operand2 op2); void MOVT(ARMReg dest, Operand2 op2, bool TopBits = false); + // UDIV and SDIV are only available on CPUs that have + // the idiva hardare capacity + void UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor); + void SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor); void MUL (ARMReg dest, ARMReg src, ARMReg op2); void MULS(ARMReg dest, ARMReg src, ARMReg op2); @@ -422,18 +448,29 @@ public: // dest contains the result if the instruction managed to store the value void STREX(ARMReg dest, ARMReg base, ARMReg op); void DMB (); - + + // NEON and ASIMD instructions + // None of these will be created with conditional since ARM + // is deprecating conditional execution of ASIMD instructions. + // Some ASIMD instructions don't even have a conditional encoding. + + void VLDR(ARMReg dest, ARMReg Base, Operand2 op); + void VMOV(ARMReg Dest, ARMReg Src); + // Utility functions // The difference between this and CALL is that this aligns the stack // where appropriate. void ARMABI_CallFunction(void *func); void ARMABI_CallFunctionC(void *func, u32 Arg0); void ARMABI_CallFunctionCC(void *func, u32 Arg1, u32 Arg2); + void ARMABI_CallFunctionCCC(void *func, u32 Arg1, u32 Arg2, u32 Arg3); void ARMABI_PushAllCalleeSavedRegsAndAdjustStack(); void ARMABI_PopAllCalleeSavedRegsAndAdjustStack(); void ARMABI_MOVI2R(ARMReg reg, Operand2 val); void ARMABI_MOVI2M(Operand2 op, Operand2 val); + void ARMABI_MOVI2M(u32 addr, Operand2 val); void ARMABI_ShowConditions(); + void ARMABI_Return(); void UpdateAPSR(bool NZCVQ, u8 Flags, bool GE, u8 GEval); diff --git a/Common/CPUDetect.cpp b/Common/CPUDetect.cpp index ea04ca0704..219b0f5bd7 100644 --- a/Common/CPUDetect.cpp +++ b/Common/CPUDetect.cpp @@ -2,7 +2,7 @@ // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. +// the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -30,7 +30,9 @@ #else //#include +#ifndef _M_GENERIC #include +#endif #if defined __FreeBSD__ #include @@ -39,7 +41,9 @@ static inline void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { -#ifdef _LP64 +#if defined _M_GENERIC + (*eax) = (*ebx) = (*ecx) = (*edx) = 0; +#elif defined _LP64 // Note: EBX is reserved on Mac OS X and in PIC on Linux, so it has to // restored at the end of the asm block. __asm__ ( @@ -93,10 +97,6 @@ CPUInfo::CPUInfo() { Detect(); } -#ifdef _WIN32 -#include -#endif - // Detects the various cpu features void CPUInfo::Detect() { diff --git a/Common/CPUDetect.h b/Common/CPUDetect.h index 5b98ab4805..858c57e680 100644 --- a/Common/CPUDetect.h +++ b/Common/CPUDetect.h @@ -2,7 +2,7 @@ // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. +// the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -25,7 +25,8 @@ enum CPUVendor { VENDOR_INTEL = 0, VENDOR_AMD = 1, - VENDOR_OTHER = 2, + VENDOR_ARM = 2, + VENDOR_OTHER = 3, }; struct CPUInfo @@ -55,6 +56,24 @@ struct CPUInfo bool bAES; bool bLAHFSAHF64; bool bLongMode; + + // ARM specific CPUInfo + bool bSwp; + bool bHalf; + bool bThumb; + bool bFastMult; + bool bVFP; + bool bEDSP; + bool bThumbEE; + bool bNEON; + bool bVFPv3; + bool bTLS; + bool bVFPv4; + bool bIDIVa; + bool bIDIVt; + // ARMv8 specific + bool bFP; + bool bASIMD; // Call Detect() explicit CPUInfo(); diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj index c61737636a..6417d93216 100644 --- a/Common/Common.vcxproj +++ b/Common/Common.vcxproj @@ -203,6 +203,12 @@ true true + + true + true + true + true + true true diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters index 93673cb372..8e09f10993 100644 --- a/Common/Common.vcxproj.filters +++ b/Common/Common.vcxproj.filters @@ -74,6 +74,7 @@ + diff --git a/Common/ThunkARM.cpp b/Common/ThunkARM.cpp index b470f868e8..98cee1683d 100644 --- a/Common/ThunkARM.cpp +++ b/Common/ThunkARM.cpp @@ -2,7 +2,7 @@ // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. +// the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/Core/MIPS/ARM/Asm.cpp b/Core/MIPS/ARM/Asm.cpp index da68215c7b..9072857f8a 100644 --- a/Core/MIPS/ARM/Asm.cpp +++ b/Core/MIPS/ARM/Asm.cpp @@ -81,6 +81,7 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) #endif */ + PUSH(8, R5, R6, R7, R8, R9, R10, R11, _LR); SetCC(CC_AL); //ARMABI_MOVIMM32(R11, (u32)Memory::base); //ARMABI_MOVIMM32(R10, (u32)jit->GetBlockCache()->GetCodePointers()); @@ -107,26 +108,23 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) //ARMABI_MOVIMM32(R1, Memory::MEMVIEW32_MASK + 1); AND(R0, R0, R1); LDR(R0, R11, R0); - MOV(R1, R0); - //AND(R1, R1, Imm32(MIPS_EMUHACK_MASK)); - //AND(R0, R0, Imm32(MIPS_EMUHACK_VALUE_MASK)); - //CMP(R0, Imm32(MIPS_EMUHACK_OPCODE)); - SetCC(CC_NEQ); - FixupBranch notfound = B(); - SetCC(CC_AL); + AND(R1, R0, Operand2(0xFC, 24)); + BIC(R0, R0, Operand2(0xFC, 24)); + CMP(R1, Operand2(MIPS_EMUHACK_OPCODE >> 24, 24)); + FixupBranch notfound = B_CC(CC_NEQ); // IDEA - we have 24 bits, why not just use offsets from base of code? if (enableDebug) { - // ADD(32, M(&mips->debugCount), Imm8(1)); + //ADD(32, M(&mips->debugCount), Imm8(1)); } - //grab from list and jump to it - // ADD(R0, R10, LSL(R0, 2)); + // grab from list and jump to it + ADD(R0, R10, Operand2(2, ST_LSL, R0)); + LDR(R0, R0); B(R0); SetJumpTarget(notfound); - //BL(&Jit); - - //B(dispatcherNoCheck); // no point in special casing this + ARMABI_CallFunction((void *)&Jit); + B(dispatcherNoCheck); // no point in special casing this SetJumpTarget(bail); doTiming = GetCodePtr(); @@ -148,11 +146,9 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) //ARMABI_PopAllCalleeSavedRegsAndAdjustStack(); B(_LR); - breakpointBailout = GetCodePtr(); + PUSH(8, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns + - //Landing pad for drec space - //ARMABI_PopAllCalleeSavedRegsAndAdjustStack(); - //RET(); GenerateCommon(); } diff --git a/Core/MIPS/ARM/Asm.h b/Core/MIPS/ARM/Asm.h index 4897d885f4..481e46374b 100644 --- a/Core/MIPS/ARM/Asm.h +++ b/Core/MIPS/ARM/Asm.h @@ -72,10 +72,7 @@ public: const u8 *fpException; const u8 *testExceptions; const u8 *testExternalExceptions; - const u8 *dispatchPcInEAX; const u8 *doTiming; - - const u8 *breakpointBailout; }; #endif // _JIT64ASM_H diff --git a/Core/MIPS/ARM/CompALU.cpp b/Core/MIPS/ARM/CompALU.cpp index da608b6655..5560da52e4 100644 --- a/Core/MIPS/ARM/CompALU.cpp +++ b/Core/MIPS/ARM/CompALU.cpp @@ -151,10 +151,7 @@ namespace MIPSComp int rs = _RS; int rd = _RD; - gpr.Lock(rd, rs, rt); - gpr.BindToRegister(rs, true, false); - gpr.BindToRegister(rt, true, false); - gpr.BindToRegister(rd, true, true); + // gpr.Lock(rd, rs, rt); switch (op & 63) { @@ -163,42 +160,42 @@ namespace MIPSComp // case 32: //R(rd) = R(rs) + R(rt); break; //add case 33: //R(rd) = R(rs) + R(rt); break; //addu - ADD(gpr.RX(rd), gpr.RX(rs), gpr.RX(rt)); + ADD(gpr.R(rd), gpr.R(rs), gpr.R(rt)); break; case 134: //R(rd) = R(rs) - R(rt); break; //sub case 135: - SUB(gpr.RX(rd), gpr.RX(rs), gpr.RX(rt)); + SUB(gpr.R(rd), gpr.R(rs), gpr.R(rt)); break; case 136: //R(rd) = R(rs) & R(rt); break; //and - AND(gpr.RX(rd), gpr.RX(rs), gpr.RX(rt)); + AND(gpr.R(rd), gpr.R(rs), gpr.R(rt)); break; case 137: //R(rd) = R(rs) | R(rt); break; //or - ORR(gpr.RX(rd), gpr.RX(rs), gpr.RX(rt)); + ORR(gpr.R(rd), gpr.R(rs), gpr.R(rt)); break; case 138: //R(rd) = R(rs) ^ R(rt); break; //xor/eor - EOR(gpr.RX(rd), gpr.RX(rs), gpr.RX(rt)); + EOR(gpr.R(rd), gpr.R(rs), gpr.R(rt)); break; case 39: // R(rd) = ~(R(rs) | R(rt)); //nor - ORR(gpr.RX(rd), gpr.RX(rs), gpr.RX(rt)); - MVN(gpr.RX(rd), gpr.RX(rd)); + ORR(gpr.R(rd), gpr.R(rs), gpr.R(rt)); + MVN(gpr.R(rd), gpr.R(rd)); break; case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt - CMP(gpr.RX(rs), gpr.RX(rt)); + CMP(gpr.R(rs), gpr.R(rt)); SetCC(CC_LT); - ARMABI_MOVI2R(gpr.RX(rd), 1); + ARMABI_MOVI2R(gpr.R(rd), 1); SetCC(CC_GE); - ARMABI_MOVI2R(gpr.RX(rd), 0); + ARMABI_MOVI2R(gpr.R(rd), 0); SetCC(CC_AL); break; case 43: //R(rd) = R(rs) < R(rt); break; //sltu - CMP(gpr.RX(rs), gpr.RX(rt)); + CMP(gpr.R(rs), gpr.R(rt)); SetCC(CC_LO); - ARMABI_MOVI2R(gpr.RX(rd), 1); + ARMABI_MOVI2R(gpr.R(rd), 1); SetCC(CC_HS); - ARMABI_MOVI2R(gpr.RX(rd), 0); + ARMABI_MOVI2R(gpr.R(rd), 0); SetCC(CC_AL); break; @@ -209,11 +206,11 @@ namespace MIPSComp // CMP(a,b); CMOVGT(a,b) default: - gpr.UnlockAll(); + // gpr.UnlockAll(); Comp_Generic(op); break; } - gpr.UnlockAll(); + // gpr.UnlockAll(); } diff --git a/Core/MIPS/ARM/CompBranch.cpp b/Core/MIPS/ARM/CompBranch.cpp index e9112d5fcb..7c110ff4cd 100644 --- a/Core/MIPS/ARM/CompBranch.cpp +++ b/Core/MIPS/ARM/CompBranch.cpp @@ -41,8 +41,8 @@ using namespace MIPSAnalyst; namespace MIPSComp { - /* -void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely) + +void Jit::BranchRSRTComp(u32 op, ArmGen::CCFlags cc, bool likely) { int offset = (signed short)(op&0xFFFF)<<2; int rt = _RT; @@ -61,28 +61,32 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely) if (rs == 0) { - CMP(32, gpr.R(rt), Imm32(0)); + CMP(gpr.R(rs), Operand2(0)); } else { - gpr.BindToRegister(rs, true, false); - CMP(32, gpr.R(rs), rt == 0 ? Imm32(0) : gpr.R(rt)); + CMP(gpr.R(rs), gpr.R(rt)); } FlushAll(); js.inDelaySlot = true; - Gen::FixupBranch ptr; + ArmGen::FixupBranch ptr; if (!likely) { - PUSHF(); // preserve flag around the delay slot! + // preserve flag around the delay slot! Maybe this is not always necessary on ARM where + // we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the + // delay slot, we're screwed. + MRS(R0); // Save flags register + PUSH(1, R0); CompileAt(js.compilerPC + 4); FlushAll(); - POPF(); // restore flag! - ptr = J_CC(cc, true); + POP(1, R0); + _MSR(false, false, R0); // Restore flags register + ptr = B_CC(cc); } else { - ptr = J_CC(cc, true); + ptr = B_CC(cc); CompileAt(js.compilerPC + 4); FlushAll(); } @@ -97,9 +101,9 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely) js.compiling = false; } -*/ - /* -void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool likely) + + +void Jit::BranchRSZeroComp(u32 op, ArmGen::CCFlags cc, bool likely) { int offset = (signed short)(op&0xFFFF)<<2; int rs = _RS; @@ -110,26 +114,31 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool likely) bool delaySlotIsNice = GetOutReg(delaySlotOp) != rs; if (!delaySlotIsNice) { - ERROR_LOG(CPU, "Not nice delay slot in BranchRSZeroComp :( %08x", js.compilerPC); + // ERROR_LOG(CPU, "Not nice delay slot in BranchRSZeroComp :( %08x", js.compilerPC); } - gpr.BindToRegister(rs, true, false); - CMP(32, gpr.R(rs), Imm32(0)); + CMP(gpr.R(rs), Operand2(0)); FlushAll(); - Gen::FixupBranch ptr; js.inDelaySlot = true; - if (!likely) + ArmGen::FixupBranch ptr; + if (!likely) { - PUSHF(); // preserve flag around the delay slot! Better hope the delay slot instruction doesn't need to fall back to interpreter... + // preserve flag around the delay slot! Maybe this is not always necessary on ARM where + // we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the + // delay slot, we're screwed. + MRS(R0); // Save flags register + PUSH(1, R0); CompileAt(js.compilerPC + 4); FlushAll(); - POPF(); // restore flag! - ptr = J_CC(cc, true); + + POP(1, R0); + _MSR(false, false, R0); // Restore flags register + ptr = B_CC(cc); } else { - ptr = J_CC(cc, true); + ptr = B_CC(cc); CompileAt(js.compilerPC + 4); FlushAll(); } @@ -143,54 +152,50 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool likely) WriteExit(js.compilerPC + 8, 1); js.compiling = false; } -*/ + void Jit::Comp_RelBranch(u32 op) { - /* + // The CC flags here should be opposite of the actual branch becuase they skip the branching action. switch (op>>26) { - case 4: BranchRSRTComp(op, CC_NZ, false); break;//beq - case 5: BranchRSRTComp(op, CC_Z, false); break;//bne + case 4: BranchRSRTComp(op, CC_NEQ, false); break;//beq + case 5: BranchRSRTComp(op, CC_EQ, false); break;//bne - case 6: BranchRSZeroComp(op, CC_G, false); break;//blez + case 6: BranchRSZeroComp(op, CC_GT, false); break;//blez case 7: BranchRSZeroComp(op, CC_LE, false); break;//bgtz - case 20: BranchRSRTComp(op, CC_NZ, true); break;//beql - case 21: BranchRSRTComp(op, CC_Z, true); break;//bnel + case 20: BranchRSRTComp(op, CC_NEQ, true); break;//beql + case 21: BranchRSRTComp(op, CC_EQ, true); break;//bnel - case 22: BranchRSZeroComp(op, CC_G, true); break;//blezl + case 22: BranchRSZeroComp(op, CC_GT, true); break;//blezl case 23: BranchRSZeroComp(op, CC_LE, true); break;//bgtzl default: _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); break; } - */ js.compiling = false; } void Jit::Comp_RelBranchRI(u32 op) { - /* switch ((op >> 16) & 0x1F) { case 0: BranchRSZeroComp(op, CC_GE, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz - case 1: BranchRSZeroComp(op, CC_L, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez + case 1: BranchRSZeroComp(op, CC_LT, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez case 2: BranchRSZeroComp(op, CC_GE, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl - case 3: BranchRSZeroComp(op, CC_L, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl + case 3: BranchRSZeroComp(op, CC_LT, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl default: _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); break; } - */ js.compiling = false; } // If likely is set, discard the branch slot if NOT taken. -/* -void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely) +void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely) { int offset = (signed short)(op & 0xFFFF) << 2; u32 targetAddr = js.compilerPC + offset + 4; @@ -204,20 +209,27 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely) } FlushAll(); - TEST(32, M((void *)&(mips_->fpcond)), Imm32(1)); - Gen::FixupBranch ptr; + ARMABI_MOVI2R(R0, (u32)&(mips_->fpcond)); + LDR(R0, R0, Operand2(0, TYPE_IMM)); + TST(R0, Operand2(1, TYPE_IMM)); + ArmGen::FixupBranch ptr; js.inDelaySlot = true; if (!likely) { - PUSHF(); // preserve flag around the delay slot! + MRS(R0); // Save flags register + PUSH(1, R0); + CompileAt(js.compilerPC + 4); FlushAll(); - POPF(); // restore flag! - ptr = J_CC(cc, true); + + // POPF(); // restore flag! + POP(1, R0); + _MSR(false, false, R0); // Restore flags register + ptr = B_CC(cc); } else { - ptr = J_CC(cc, true); + ptr = B_CC(cc); CompileAt(js.compilerPC + 4); FlushAll(); } @@ -229,26 +241,21 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely) SetJumpTarget(ptr); // Not taken WriteExit(js.compilerPC + 8, 1); - js.compiling = false; } -*/ - void Jit::Comp_FPUBranch(u32 op) { - /* switch((op >> 16) & 0x1f) { - case 0: BranchFPFlag(op, CC_NZ, false); break; // bc1f - case 1: BranchFPFlag(op, CC_Z, false); break; // bc1t - case 2: BranchFPFlag(op, CC_NZ, true); break; // bc1fl - case 3: BranchFPFlag(op, CC_Z, true); break; // bc1tl + case 0: BranchFPFlag(op, CC_NEQ, false); break; // bc1f + case 1: BranchFPFlag(op, CC_EQ, false); break; // bc1t + case 2: BranchFPFlag(op, CC_NEQ, true); break; // bc1fl + case 3: BranchFPFlag(op, CC_EQ, true); break; // bc1tl default: _dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted"); break; } - */ js.compiling = false; } @@ -274,13 +281,12 @@ void Jit::Comp_VBranch(u32 op) //case 3: if ( val) DelayBranchTo(addr); else PC += 8; break; //bvtl //TODO } - js.compiling = false; - */ + */ + js.compiling = false; } void Jit::Comp_Jump(u32 op) { - /* u32 off = ((op & 0x3FFFFFF) << 2); u32 targetAddr = (js.compilerPC & 0xF0000000) | off; //Delay slot @@ -294,7 +300,9 @@ void Jit::Comp_Jump(u32 op) break; case 3: //jal - MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8)); // Save return address + ARMABI_MOVI2R(R0, Operand2(js.compilerPC + 8, TYPE_IMM)); + ARMABI_MOVI2R(R1, Operand2((u32)&mips_->r[MIPS_REG_RA], TYPE_IMM)); + STR(R1, R0); WriteExit(targetAddr, 0); break; @@ -303,54 +311,55 @@ void Jit::Comp_Jump(u32 op) break; } js.compiling = false; - */ } void Jit::Comp_JumpReg(u32 op) { - /* - int rs = _RS; - u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4); - bool delaySlotIsNice = GetOutReg(delaySlotOp) != rs; + bool delaySlotIsNice = GetOutReg(delaySlotOp) != _RS; // Do what with that information? - gpr.BindToRegister(rs, true, false); - PUSH(32, gpr.R(rs)); + ARMReg rs = gpr.R(_RS); + + // Delay slot + PUSH(1, rs); // Save the destination address through the delay slot. Could use isNice to avoid CompileAt(js.compilerPC + 4); FlushAll(); - POP(32, R(EAX)); + POP(1, R0); switch (op & 0x3f) { case 8: //jr break; case 9: //jalr - MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8)); + ARMABI_MOVI2R(R1, (u32)&mips_->r[MIPS_REG_RA]); + ARMABI_MOVI2R(R2, js.compilerPC + 8); + STR(R1, R2); + // MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8)); break; default: _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); break; } - WriteExitDestInEAX(); - js.compiling = false; - */ + WriteExitDestInR(R0); + js.compiling = false; } void Jit::Comp_Syscall(u32 op) { - /* FlushAll(); - MOV(32, R(EAX), M(&mips_->r[MIPS_REG_RA])); - MOV(32, M(&mips_->pc), R(EAX)); + // By putting mips_ in a register and using offsets, we could get rid of one of the constant-sets. + ARMABI_MOVI2R(R0, (u32)&mips_->r[MIPS_REG_RA]); + LDR(R0, R0); + ARMABI_MOVI2R(R1, (u32)&mips_->pc); + STR(R1, R0); - ABI_CallFunctionC(&CallSyscall, op); - - WriteSyscallExit();*/ + ARMABI_CallFunctionC((void *)&CallSyscall, op); + WriteSyscallExit(); } } // namespace Mipscomp diff --git a/Core/MIPS/ARM/Jit.cpp b/Core/MIPS/ARM/Jit.cpp index c1e55cada4..d3c33f61c7 100644 --- a/Core/MIPS/ARM/Jit.cpp +++ b/Core/MIPS/ARM/Jit.cpp @@ -62,20 +62,20 @@ void MovToReg(int reg, u32 value) } */ -Jit::Jit(MIPSState *mips) : blocks(mips), mips_(mips) +Jit::Jit(MIPSState *mips) : blocks(mips), gpr(mips), mips_(mips) { blocks.Init(); asm_.Init(mips, this); gpr.SetEmitter(this); - fpr.SetEmitter(this); + //fpr.SetEmitter(this); AllocCodeSpace(1024 * 1024 * 16); } void Jit::FlushAll() { - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); + gpr.Flush(); + //fpr.Flush(FLUSH_ALL); } void Jit::ClearCache() @@ -124,8 +124,8 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b) // TODO: this needs work MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address); - gpr.Start(mips_, analysis); - fpr.Start(mips_, analysis); + gpr.Start(analysis); + //fpr.Start(mips_, analysis); int numInstructions = 0; int cycles = 0; @@ -164,10 +164,42 @@ void Jit::Comp_Generic(u32 op) } } +void Jit::DoDownCount() +{ + ARMReg A = gpr.GetReg(); + ARMReg B = gpr.GetReg(); + ARMABI_MOVI2R(A, Mem(&CoreTiming::downcount)); + LDR(B, A); + if(js.downcountAmount < 255) // We can enlarge this if we used rotations + { + SUBS(B, B, js.downcountAmount); + STR(A, B); + } + else + { + ARMReg C = gpr.GetReg(false); + ARMABI_MOVI2R(C, js.downcountAmount); + SUBS(B, B, C); + STR(A, B); + } + gpr.Unlock(A, B); +} + +void Jit::WriteExitDestInR(ARMReg Reg) +{ + ARMReg A = gpr.GetReg(); + ARMABI_MOVI2R(A, (u32)&mips_->pc); + STR(A, Reg); + gpr.Unlock(Reg); // This was locked in the instruction beforehand. + DoDownCount(); + ARMABI_MOVI2R(A, (u32)asm_.dispatcher); + B(A); + gpr.Unlock(A); +} + void Jit::WriteExit(u32 destination, int exit_num) { - //SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); - + DoDownCount(); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; b->exitAddress[exit_num] = destination; @@ -178,41 +210,28 @@ void Jit::WriteExit(u32 destination, int exit_num) if (block >= 0 && jo.enableBlocklink) { // It exists! Joy of joy! - //JMP(blocks.GetBlock(block)->checkedEntry, true); + B(blocks.GetBlock(block)->checkedEntry); b->linkStatus[exit_num] = true; } else { - //MOV(32, M(&mips_->pc), Imm32(destination)); - //JMP(asm_.dispatcher, true); + ARMABI_MOVI2R(R0, (u32)&mips_->pc); // Watch out! This uses R14 and R12! + ARMABI_MOVI2R(R1, destination); // Watch out! This uses R14 and R12! + STR(R0, R1); // Watch out! This uses R14 and R12! + ARMReg A = gpr.GetReg(false); + ARMABI_MOVI2R(A, (u32)asm_.dispatcher); + B(A); } } -void Jit::WriteExitDestInEAX() -{ - /* - MOV(32, M(&mips_->pc), R(EAX)); - SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); - JMP(asm_.dispatcher, true); - */ -} -/* -void Jit::WriteRfiExitDestInEAX() -{ - MOV(32, M(&mips_->pc), R(EAX)); - SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); - JMP(asm_routines.testExceptions, true); -}*/ - void Jit::WriteSyscallExit() { - - /* - SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); - JMP(asm_.dispatcher, true); - */ + // Super basic + DoDownCount(); + B((const void *)asm_.dispatcher); } + #define _RS ((op>>21) & 0x1F) #define _RT ((op>>16) & 0x1F) #define _RD ((op>>11) & 0x1F) diff --git a/Core/MIPS/ARM/Jit.h b/Core/MIPS/ARM/Jit.h index fe81735e41..4c9c0f695b 100644 --- a/Core/MIPS/ARM/Jit.h +++ b/Core/MIPS/ARM/Jit.h @@ -98,15 +98,16 @@ private: void ClearCache(); void FlushAll(); + void DoDownCount(); + void WriteExit(u32 destination, int exit_num); - void WriteExitDestInEAX(); -// void WriteRfiExitDestInEAX(); + void WriteExitDestInR(ARMReg Reg); void WriteSyscallExit(); // Utility compilation functions - //void BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely); - //void BranchRSZeroComp(u32 op, ArmGen::CCFlags cc, bool likely); - //void BranchRSRTComp(u32 op, ArmGen::CCFlags cc, bool likely); + void BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely); + void BranchRSZeroComp(u32 op, ArmGen::CCFlags cc, bool likely); + void BranchRSRTComp(u32 op, ArmGen::CCFlags cc, bool likely); // Utilities to reduce duplicated code /* @@ -122,8 +123,8 @@ private: JitOptions jo; JitState js; - GPRRegCache gpr; - FPURegCache fpr; + ArmRegCache gpr; + // FPURegCache fpr; AsmRoutineManager asm_; diff --git a/Core/MIPS/ARM/RegCache.cpp b/Core/MIPS/ARM/RegCache.cpp index e562f78411..1a6c966298 100644 --- a/Core/MIPS/ARM/RegCache.cpp +++ b/Core/MIPS/ARM/RegCache.cpp @@ -2,11 +2,11 @@ // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. +// the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License 2.0 for more details. // A copy of the GPL 2.0 should have been included with the program. @@ -15,395 +15,171 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#include "../MIPS.h" -#include "../MIPSTables.h" -#include "../MIPSAnalyst.h" -#include "Jit.h" -#include "Asm.h" #include "RegCache.h" -#include "CommonFuncs.h" +#include "ArmEmitter.h" using namespace ArmGen; -RegCache::RegCache() : emit(0) { - memset(locks, 0, sizeof(locks)); - memset(xlocks, 0, sizeof(xlocks)); - memset(saved_locks, 0, sizeof(saved_locks)); - memset(saved_xlocks, 0, sizeof(saved_xlocks)); - memset(regs, 0, sizeof(regs)); - memset(xregs, 0, sizeof(xregs)); - memset(saved_regs, 0, sizeof(saved_regs)); - memset(saved_xregs, 0, sizeof(saved_xregs)); +ArmRegCache::ArmRegCache(MIPSState *mips) + : mips_(mips) +{ + emit = 0; + } -static const int allocationOrder[] = +void ArmRegCache::Init(ARMXEmitter *emitter) { - R2, R3, R4, R5, R6, R7, R8, R10, R11 // omitting R9? -}; - -void RegCache::Start(MIPSState *mips, MIPSAnalyst::AnalysisResults &stats) -{ - for (int i = 0; i < NUMARMREGS; i++) + emit = emitter; + ARMReg *PPCRegs = GetMIPSAllocationOrder(NUMMIPSREG); + ARMReg *Regs = GetAllocationOrder(NUMARMREG); + for(int a = 0; a < 32; ++a) { - xregs[i].free = true; - xregs[i].dirty = false; - xlocks[i] = false; + // This gives us the memory locations of the gpr registers so we can + // load them. + regs[a].location = (u8*)&mips_->r[a]; } - for (int i = 0; i < 32; i++) + for(int a = 0; a < NUMMIPSREG; ++a) { - regs[i].location = GetDefaultLocation(i); - regs[i].away = false; + ArmCRegs[a].MIPSReg = 33; + ArmCRegs[a].Reg = PPCRegs[a]; + ArmCRegs[a].LastLoad = 0; } - - // todo: sort to find the most popular regs - /* - int maxPreload = 2; - for (int i = 0; i < 32; i++) + for(int a = 0; a < NUMARMREG; ++a) { - if (stats.numReads[i] > 2 || stats.numWrites[i] >= 2) - { - LoadToX64(i, true, false); //stats.firstRead[i] <= stats.firstWrite[i], false); - maxPreload--; - if (!maxPreload) - break; - } - }*/ - //Find top regs - preload them (load bursts ain't bad) - //But only preload IF written OR reads >= 3 -} - -// these are powerpc reg indices -void RegCache::Lock(int p1, int p2, int p3, int p4) -{ - locks[p1] = true; - if (p2 != 0xFF) locks[p2] = true; - if (p3 != 0xFF) locks[p3] = true; - if (p4 != 0xFF) locks[p4] = true; -} - -// these are x64 reg indices -void RegCache::LockX(int x1, int x2, int x3, int x4) -{ - if (xlocks[x1]) { - PanicAlert("RegCache: x %i already locked!", x1); + ArmRegs[a].Reg = Regs[a]; + ArmRegs[a].free = true; } - xlocks[x1] = true; - if (x2 != 0xFF) xlocks[x2] = true; - if (x3 != 0xFF) xlocks[x3] = true; - if (x4 != 0xFF) xlocks[x4] = true; } -bool RegCache::IsFreeX(int xreg) const +void ArmRegCache::Start(MIPSAnalyst::AnalysisResults &stats) { - return xregs[xreg].free && !xlocks[xreg]; -} - -void RegCache::UnlockAll() -{ - for (int i = 0; i < 32; i++) - locks[i] = false; -} - -void RegCache::UnlockAllX() -{ - for (int i = 0; i < NUMARMREGS; i++) - xlocks[i] = false; -} - -ARMReg RegCache::GetFreeXReg() -{ - int aCount; - const int *aOrder = GetAllocationOrder(aCount); - for (int i = 0; i < aCount; i++) + for(int a = 0; a < NUMMIPSREG; ++a) { - ARMReg xr = (ARMReg)aOrder[i]; - if (!xlocks[xr] && xregs[xr].free) - { - return (ARMReg)xr; - } + ArmCRegs[a].MIPSReg = 33; + ArmCRegs[a].LastLoad = 0; } - //Okay, not found :( Force grab one +} - //TODO - add a pass to grab xregs whose ppcreg is not used in the next 3 instructions - for (int i = 0; i < aCount; i++) +ARMReg *ArmRegCache::GetMIPSAllocationOrder(int &count) +{ + // This will return us the allocation order of the registers we can use on + // the MIPS side. + static ARMReg allocationOrder[] = { - ARMReg xr = (ARMReg)aOrder[i]; - if (xlocks[xr]) - continue; - int preg = xregs[xr].ppcReg; - if (!locks[preg]) - { - StoreFromRegister(preg); - return xr; - } - } - //Still no dice? Die! - _assert_msg_(DYNA_REC, 0, "Regcache ran out of regs"); - return (ARMReg) -1; + R0, R1, R2, R3, R4, R5, R6, R7, R8, R9 + }; + count = sizeof(allocationOrder) / sizeof(const int); + return allocationOrder; } - -void RegCache::SaveState() +ARMReg *ArmRegCache::GetAllocationOrder(int &count) { - memcpy(saved_locks, locks, sizeof(locks)); - memcpy(saved_xlocks, xlocks, sizeof(xlocks)); - memcpy(saved_regs, regs, sizeof(regs)); - memcpy(saved_xregs, xregs, sizeof(xregs)); -} - -void RegCache::LoadState() -{ - memcpy(xlocks, saved_xlocks, sizeof(xlocks)); - memcpy(locks, saved_locks, sizeof(locks)); - memcpy(regs, saved_regs, sizeof(regs)); - memcpy(xregs, saved_xregs, sizeof(xregs)); -} - -void RegCache::FlushR(ARMReg reg) -{ - if (reg >= NUMARMREGS) - PanicAlert("Flushing non existent reg"); - if (!xregs[reg].free) + // This will return us the allocation order of the registers we can use on + // the host side. + static ARMReg allocationOrder[] = { - StoreFromRegister(xregs[reg].ppcReg); - } -} - -int RegCache::SanityCheck() const -{ - for (int i = 0; i < 32; i++) { - if (regs[i].away) { - if (regs[i].location.IsSimpleReg()) { - ARMReg simple = regs[i].location.GetSimpleReg(); - if (xlocks[simple]) - return 1; - if (xregs[simple].ppcReg != i) - return 2; - } - else if (regs[i].location.IsImm()) - return 3; - } - } - return 0; -} - -void RegCache::DiscardRegContentsIfCached(int preg) -{ - if (regs[preg].away && regs[preg].location.IsSimpleReg()) - { - ARMReg xr = regs[preg].location.GetSimpleReg(); - xregs[xr].free = true; - xregs[xr].dirty = false; - xregs[xr].ppcReg = -1; - regs[preg].away = false; - regs[preg].location = GetDefaultLocation(preg); - } -} - - -void GPRRegCache::SetImmediate32(int preg, u32 immValue) -{ - //if (regs[preg].away == true && regs[preg].location.IsImm() && regs[preg].location.offset == immValue) - // return; - DiscardRegContentsIfCached(preg); - regs[preg].away = true; - regs[preg].location.SetImm32(immValue); -} - -void GPRRegCache::Start(MIPSState *mips, MIPSAnalyst::AnalysisResults &stats) -{ - RegCache::Start(mips, stats); -} - -void FPURegCache::Start(MIPSState *mips, MIPSAnalyst::AnalysisResults &stats) -{ - RegCache::Start(mips, stats); -} - -const int *GPRRegCache::GetAllocationOrder(int &count) -{ + R14, R12, R11, R10 + }; count = sizeof(allocationOrder) / sizeof(const int); return allocationOrder; } -const int *FPURegCache::GetAllocationOrder(int &count) +ARMReg ArmRegCache::GetReg(bool AutoLock) { - static const int allocationOrder[] = + for (int a = 0; a < NUMARMREG; ++a) { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 - }; - count = sizeof(allocationOrder) / sizeof(int); - return allocationOrder; -} - -Location GPRRegCache::GetDefaultLocation(int reg) const -{ - Location loc; - loc.SetM(&mips->r[reg]); - return loc; -} - -Location FPURegCache::GetDefaultLocation(int reg) const -{ - Location loc; - loc.SetM(&mips->f[reg]); - return loc; -} - -void RegCache::KillImmediate(int preg, bool doLoad, bool makeDirty) -{ - if (regs[preg].away) - { - if (regs[preg].location.IsImm()) - BindToRegister(preg, doLoad, makeDirty); - else if (regs[preg].location.IsSimpleReg()) - xregs[RX(preg)].dirty |= makeDirty; - } -} - -void GPRRegCache::BindToRegister(int i, bool doLoad, bool makeDirty) -{ - if (!regs[i].away && regs[i].location.IsImm()) - PanicAlert("Bad immediate"); - - if (!regs[i].away || (regs[i].away && regs[i].location.IsImm())) - { - ARMReg xr = GetFreeXReg(); - if (xregs[xr].dirty) PanicAlert("Xreg already dirty"); - if (xlocks[xr]) PanicAlert("GetFreeXReg returned locked register"); - xregs[xr].free = false; - xregs[xr].ppcReg = i; - xregs[xr].dirty = makeDirty || regs[i].location.IsImm(); - Location newloc; - newloc.SetReg(xr); - - //if (doLoad) - // emit->MOV(32, newloc, regs[i].location); - for (int j = 0; j < 32; j++) + if(ArmRegs[a].free) { - if (i != j && regs[j].location.IsSimpleReg() && regs[j].location.GetSimpleReg() == xr) - { - PanicAlert("bad"); - } + // Alright, this one is free + if (AutoLock) + ArmRegs[a].free = false; + return ArmRegs[a].Reg; } - regs[i].away = true; - regs[i].location = newloc; } - else + // Uh Oh, we have all them locked.... + _assert_msg_(JIT, false, "All available registers are locked dumb dumb"); +} +void ArmRegCache::Lock(ARMReg Reg) +{ + for (int RegNum = 0; RegNum < NUMARMREG; ++RegNum) { - // reg location must be simplereg; memory locations - // and immediates are taken care of above. - xregs[RX(i)].dirty |= makeDirty; + if(ArmRegs[RegNum].Reg == Reg) + { + _assert_msg_(JIT, ArmRegs[RegNum].free, "This register is already locked"); + ArmRegs[RegNum].free = false; + } } - if (xlocks[RX(i)]) { - PanicAlert("Seriously WTF, this reg should have been flushed"); + _assert_msg_(JIT, false, "Register %d can't be used with lock", Reg); +} +void ArmRegCache::Unlock(ARMReg R0, ARMReg R1, ARMReg R2, ARMReg R3) +{ + for (int RegNum = 0; RegNum < NUMARMREG; ++RegNum) + { + if(ArmRegs[RegNum].Reg == R0) + { + _assert_msg_(JIT, !ArmRegs[RegNum].free, "This register is already unlocked"); + ArmRegs[RegNum].free = true; + } + if( R1 != INVALID_REG && ArmRegs[RegNum].Reg == R1) ArmRegs[RegNum].free = true; + if( R2 != INVALID_REG && ArmRegs[RegNum].Reg == R2) ArmRegs[RegNum].free = true; + if( R3 != INVALID_REG && ArmRegs[RegNum].Reg == R3) ArmRegs[RegNum].free = true; } } -void GPRRegCache::StoreFromRegister(int i) +ARMReg ArmRegCache::R(int preg) { - if (regs[i].away) - { - bool doStore; - if (regs[i].location.IsSimpleReg()) + u32 HighestUsed = 0; + u8 Num = 0; + for (int a = 0; a < NUMMIPSREG; ++a){ + ++ArmCRegs[a].LastLoad; + if (ArmCRegs[a].LastLoad > HighestUsed) { - ARMReg xr = RX(i); - xregs[xr].free = true; - xregs[xr].ppcReg = -1; - doStore = xregs[xr].dirty; - xregs[xr].dirty = false; + HighestUsed = ArmCRegs[a].LastLoad; + Num = a; } - else - { - //must be immediate - do nothing - doStore = true; - } - Location newLoc = GetDefaultLocation(i); - if (doStore) - ; - // emit->MOV(32, newLoc, regs[i].location); - regs[i].location = newLoc; - regs[i].away = false; } + // Check if already Loaded + for (int a = 0; a < NUMMIPSREG; ++a) { + if (ArmCRegs[a].MIPSReg == preg) + { + ArmCRegs[a].LastLoad = 0; + return ArmCRegs[a].Reg; + } + } + // Check if we have a free register + for (u8 a = 0; a < NUMMIPSREG; ++a) + if (ArmCRegs[a].MIPSReg == 33) + { + emit->ARMABI_MOVI2R(ArmCRegs[a].Reg, (u32)&mips_->r); + emit->LDR(ArmCRegs[a].Reg, ArmCRegs[a].Reg, preg * 4); + ArmCRegs[a].MIPSReg = preg; + ArmCRegs[a].LastLoad = 0; + return ArmCRegs[a].Reg; + } + // Alright, we couldn't get a free space, dump that least used register + // Note that this is incredibly dangerous if references to the register + // are still floating around out there! + ARMReg rA = GetReg(false); + emit->ARMABI_MOVI2R(rA, (u32)&mips_->r); + emit->STR(rA, ArmCRegs[Num].Reg, ArmCRegs[Num].MIPSReg * 4); + emit->LDR(ArmCRegs[Num].Reg, rA, preg * 4); + ArmCRegs[Num].MIPSReg = preg; + ArmCRegs[Num].LastLoad = 0; + return ArmCRegs[Num].Reg; } -void FPURegCache::BindToRegister(int i, bool doLoad, bool makeDirty) +void ArmRegCache::Flush() { - _assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - load - imm"); - if (!regs[i].away) - { - // Reg is at home in the memory register file. Let's pull it out. - ARMReg xr = GetFreeXReg(); - _assert_msg_(DYNA_REC, xr < NUMARMREGS, "WTF - load - invalid reg"); - xregs[xr].ppcReg = i; - xregs[xr].free = false; - xregs[xr].dirty = makeDirty; - Location newloc; - newloc.SetReg(xr); - if (doLoad) - { - //if (!regs[i].location.IsImm() && (regs[i].location.offset & 0xF)) - //{ - // PanicAlert("WARNING - misaligned fp register location %i", i); - //} - //emit->MOVAPD(xr, regs[i].location); - } - regs[i].location = newloc; - regs[i].away = true; - } else { - // There are no immediates in the FPR reg file, so we already had this in a register. Make dirty as necessary. - xregs[RX(i)].dirty |= makeDirty; - } -} + // Maybe we should keep this pointer around permanently? + emit->MOVW(R14, (u32)&mips_->r); + emit->MOVT(R14, (u32)&mips_->r, true); -void FPURegCache::StoreFromRegister(int i) -{ - _assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - store - imm"); - if (regs[i].away) - { - ARMReg xr = regs[i].location.GetSimpleReg(); - _assert_msg_(DYNA_REC, xr < NUMARMREGS, "WTF - store - invalid reg"); - xregs[xr].free = true; - xregs[xr].dirty = false; - xregs[xr].ppcReg = -1; - Location newLoc = GetDefaultLocation(i); - // emit->MOVAPD(newLoc, xr); - regs[i].location = newLoc; - regs[i].away = false; - } - else - { - // _assert_msg_(DYNA_REC,0,"already stored"); - } -} - -void RegCache::Flush(FlushMode mode) -{ - for (int i = 0; i < NUMARMREGS; i++) { - if (xlocks[i]) - PanicAlert("Someone forgot to unlock X64 reg %i.", i); - } - for (int i = 0; i < 32; i++) - { - if (locks[i]) + for (int a = 0; a < NUMMIPSREG; ++a) { + if (ArmCRegs[a].MIPSReg != 33) { - PanicAlert("Somebody forgot to unlock PPC reg %i.", i); - } - if (regs[i].away) - { - if (regs[i].location.IsSimpleReg()) - { - ARMReg xr = RX(i); - StoreFromRegister(i); - xregs[xr].dirty = false; - } - else if (regs[i].location.IsImm()) - { - StoreFromRegister(i); - } - else - { - _assert_msg_(DYNA_REC,0,"Jit64 - Flush unhandled case, reg %i PC: %08x", i, mips->pc); - } + emit->STR(R14, ArmCRegs[a].Reg, ArmCRegs[a].MIPSReg * 4); + ArmCRegs[a].MIPSReg = 33; + ArmCRegs[a].LastLoad = 0; } } } + diff --git a/Core/MIPS/ARM/RegCache.h b/Core/MIPS/ARM/RegCache.h index afab156b52..ab5d85c0ca 100644 --- a/Core/MIPS/ARM/RegCache.h +++ b/Core/MIPS/ARM/RegCache.h @@ -2,11 +2,11 @@ // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. +// the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License 2.0 for more details. // A copy of the GPL 2.0 should have been included with the program. @@ -15,156 +15,82 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#pragma once +#ifndef _JITARMREGCACHE_H +#define _JITARMREGCACHE_H +#include "ArmEmitter.h" +#include "../MIPS.h" #include "../MIPSAnalyst.h" -#include +#include "ArmABI.h" using namespace ArmGen; -enum FlushMode -{ - FLUSH_ALL -}; -enum GrabMode -{ - M_READ = 1, - M_WRITE = 2, - M_READWRITE = 3, -}; - -enum Loc { - LOC_IMM, - LOC_REG, - LOC_MEM -}; - -struct Location -{ - Loc loc; - bool IsSimpleReg() const {return loc == LOC_REG;} - ARMReg GetSimpleReg() const {return reg;} - bool IsImm() const { return loc == LOC_IMM; } - void SetImm32(u32 i) {loc = LOC_IMM; imm = i;} - void SetM(void *p) {loc = LOC_MEM; ptr = (u32 *)p;} - void SetReg(ARMReg r) {loc = LOC_REG; reg = r;} - - union { - u32 *ptr; - ARMReg reg; - u32 imm; - }; -}; +// This ARM Register cache actually pre loads the most used registers before +// the block to increase speed since every memory load requires two +// instructions to load it. We are going to use R0-RMAX as registers for the +// use of MIPS Registers. +// Allocation order as follows +#define ARMREGS 16 +// Allocate R0 to R9 for MIPS first. +// For General registers on the host side, start with R14 and go down as we go +// R13 is reserved for our stack pointer, don't ever use that. Unless you save +// it +// So we have R14, R12, R11, R10 to work with instructions struct MIPSCachedReg { - Location location; - bool away; // value not in source register + const u8 *location; }; -struct ARMCachedReg +struct JRCPPC { - int ppcReg; - bool dirty; + u32 MIPSReg; // Tied to which MIPS Register + ARMReg Reg; // Tied to which ARM Register + u32 LastLoad; +}; +struct JRCReg +{ + ARMReg Reg; // Which reg this is. bool free; }; - -typedef int XReg; -typedef int PReg; - -#define NUMARMREGS 15 - -class RegCache +class ArmRegCache { private: - bool locks[32]; - bool saved_locks[32]; - bool saved_xlocks[NUMARMREGS]; + MIPSCachedReg regs[32]; + JRCPPC ArmCRegs[ARMREGS]; + JRCReg ArmRegs[ARMREGS]; // Four registers remaining + + int NUMMIPSREG; // + LO, HI, ... + int NUMARMREG; + + ARMReg *GetAllocationOrder(int &count); + ARMReg *GetMIPSAllocationOrder(int &count); + + MIPSState *mips_; protected: - bool xlocks[NUMARMREGS]; - MIPSCachedReg regs[32]; - ARMCachedReg xregs[NUMARMREGS]; - - MIPSCachedReg saved_regs[32]; - ARMCachedReg saved_xregs[NUMARMREGS]; - - virtual const int *GetAllocationOrder(int &count) = 0; - ARMXEmitter *emit; public: - MIPSState *mips; - RegCache(); + ArmRegCache(MIPSState *mips); + ~ArmRegCache() {} - virtual ~RegCache() {} - virtual void Start(MIPSState *mips, MIPSAnalyst::AnalysisResults &stats) = 0; + void Init(ARMXEmitter *emitter); + void Start(MIPSAnalyst::AnalysisResults &stats); - void DiscardRegContentsIfCached(int preg); void SetEmitter(ARMXEmitter *emitter) {emit = emitter;} - void FlushR(ARMReg reg); - void FlushR(ARMReg reg, ARMReg reg2) {FlushR(reg); FlushR(reg2);} - void FlushLockX(ARMReg reg) { - FlushR(reg); - LockX(reg); - } - void FlushLockX(ARMReg reg1, ARMReg reg2) { - FlushR(reg1); FlushR(reg2); - LockX(reg1); LockX(reg2); - } - virtual void Flush(FlushMode mode); - // virtual void Flush(PPCAnalyst::CodeOp *op) {Flush(FLUSH_ALL);} - int SanityCheck() const; - void KillImmediate(int preg, bool doLoad, bool makeDirty); + // TODO: Add a way to lock MIPS registers so they aren't kicked out when you don't expect it. - //TODO - instead of doload, use "read", "write" - //read only will not set dirty flag - virtual void BindToRegister(int preg, bool doLoad = true, bool makeDirty = true) = 0; - virtual void StoreFromRegister(int preg) = 0; + ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use. + void Lock(ARMReg R0); + void Unlock(ARMReg R0, ARMReg R1 = INVALID_REG, ARMReg R2 = INVALID_REG, ARMReg R3 = INVALID_REG); + void Flush(); + ARMReg R(int preg); // Returns a cached register - const Location &R(int preg) const {return regs[preg].location;} - ARMReg RX(int preg) const - { - if (regs[preg].away && regs[preg].location.IsSimpleReg()) - return regs[preg].location.GetSimpleReg(); - PanicAlert("Not so simple - %i", preg); - return (ARMReg)-1; - } - virtual Location GetDefaultLocation(int reg) const = 0; - - // Register locking. A locked registers will not be spilled when trying to find a new free register. - void Lock(int p1, int p2=0xff, int p3=0xff, int p4=0xff); - void LockX(int x1, int x2=0xff, int x3=0xff, int x4=0xff); - void UnlockAll(); - void UnlockAllX(); - - bool IsFreeX(int xreg) const; - - ARMReg GetFreeXReg(); - - void SaveState(); - void LoadState(); -}; - -class GPRRegCache : public RegCache -{ -public: - void Start(MIPSState *mips, MIPSAnalyst::AnalysisResults &stats); - void BindToRegister(int preg, bool doLoad = true, bool makeDirty = true); - void StoreFromRegister(int preg); - Location GetDefaultLocation(int reg) const; - const int *GetAllocationOrder(int &count); - void SetImmediate32(int preg, u32 immValue); }; -class FPURegCache : public RegCache -{ -public: - void Start(MIPSState *mips, MIPSAnalyst::AnalysisResults &stats); - void BindToRegister(int preg, bool doLoad = true, bool makeDirty = true); - void StoreFromRegister(int preg); - const int *GetAllocationOrder(int &count); - Location GetDefaultLocation(int reg) const; -}; + + +#endif diff --git a/Windows/PPSSPP.vcxproj b/Windows/PPSSPP.vcxproj index 8809408ade..0f86ef108e 100644 --- a/Windows/PPSSPP.vcxproj +++ b/Windows/PPSSPP.vcxproj @@ -212,6 +212,12 @@ + + true + true + true + true + true true @@ -290,6 +296,7 @@ + @@ -322,6 +329,7 @@ + @@ -359,4 +367,4 @@ - + \ No newline at end of file diff --git a/Windows/PPSSPP.vcxproj.filters b/Windows/PPSSPP.vcxproj.filters index 5786358b73..ba3dce5c47 100644 --- a/Windows/PPSSPP.vcxproj.filters +++ b/Windows/PPSSPP.vcxproj.filters @@ -104,6 +104,9 @@ + + Android + @@ -182,6 +185,9 @@ Windows\Input + + Android + @@ -203,6 +209,7 @@ Android + diff --git a/android/jni/ARMEmitterTest.h b/android/jni/ARMEmitterTest.h new file mode 100644 index 0000000000..4d2100ce61 --- /dev/null +++ b/android/jni/ARMEmitterTest.h @@ -0,0 +1,5 @@ +#pragma once + +// Just a test of the ARM emitter, playing around with running some code without having the whole emu around. + +void ArmEmitterTest(); \ No newline at end of file diff --git a/android/jni/Android.mk b/android/jni/Android.mk index a98a774767..8f61ef704a 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -46,6 +46,7 @@ LOCAL_SRC_FILES := \ MenuScreens.cpp \ UIShader.cpp \ GamepadEmu.cpp \ + ArmEmitterTest.cpp \ ui_atlas.cpp \ $(SRC)/native/android/app-android.cpp \ $(SRC)/ext/libkirk/AES.c \ @@ -55,6 +56,7 @@ LOCAL_SRC_FILES := \ $(SRC)/ext/libkirk/kirk_engine.c \ $(SRC)/Common/ArmABI.cpp \ $(SRC)/Common/ArmEmitter.cpp \ + $(SRC)/Common/ArmCPUDetect.cpp \ $(SRC)/Common/LogManager.cpp \ $(SRC)/Common/MemArena.cpp \ $(SRC)/Common/MemoryUtil.cpp \ diff --git a/android/jni/ArmEmitterTest.cpp b/android/jni/ArmEmitterTest.cpp new file mode 100644 index 0000000000..0bdf26af19 --- /dev/null +++ b/android/jni/ArmEmitterTest.cpp @@ -0,0 +1,75 @@ +#include "base/logging.h" +#include "ARMEmitterTest.h" + +#include "Common/ArmABI.h" +#include "Common/ArmEmitter.h" + +static bool functionWasCalled; + +using namespace ArmGen; + +class TestCode : public ArmGen::ARMXCodeBlock { +public: + TestCode(); + void Generate(); + const u8 *testCodePtr; + const u8 *testCodePtr2; +}; + +TestCode::TestCode() +{ + AllocCodeSpace(0x10000); +} + + +u32 TestLeaf(u32 a, u32 b, u32 c) +{ + ILOG("TestLeaf: %08x %08x %08x\n", a, b, c); + return 0xFF; +} + +void TestLeaf2(u32 a) +{ + ILOG("TestLeaf2 %08x\n"); +} + + +void TestCode::Generate() +{ + testCodePtr = this->GetCodePtr(); + // Sonic1 commented that R11 is the frame pointer in debug mode, whatever "debug mode" means. + PUSH(2, R11, _LR); + ARMABI_MOVI2R(R0, 0x13371338); + ARMABI_MOVI2R(R1, 0x1337); + ARMABI_CallFunction((void*)&TestLeaf2); + + //ARMABI_CallFunctionCCC((void*)&TestLeaf, 0x1, 0x100, 0x1337); + //ARMABI_CallFunctionCCC((void*)&TestLeaf, 0x2, 0x100, 0x31337); + //ARMABI_CallFunctionCCC((void*)&TestLeaf, 0x3, 0x100, 0x1337); + POP(2, R11, _PC); // Yup, this is how you return. + + testCodePtr2 = this->GetCodePtr(); + PUSH(2, R11, _LR); + ARMABI_PushAllCalleeSavedRegsAndAdjustStack(); + ARMABI_CallFunction((void*)&TestLeaf2); + ARMABI_PopAllCalleeSavedRegsAndAdjustStack(); + POP(2, R11, _PC); +} + + +void CallPtr(const void *ptr) +{ + ((void(*)())ptr)(); +} + + + +void ArmEmitterTest() +{ + TestCode gen; + gen.ReserveCodeSpace(0x4000); + gen.Generate(); + + CallPtr(gen.testCodePtr); + ILOG("ARM emitter test 1 passed!"); +} \ No newline at end of file diff --git a/android/jni/NativeApp.cpp b/android/jni/NativeApp.cpp index 66d27ae616..0f13678908 100644 --- a/android/jni/NativeApp.cpp +++ b/android/jni/NativeApp.cpp @@ -47,6 +47,8 @@ #include "MenuScreens.h" #include "UIShader.h" +#include "ARMEmitterTest.h" + Texture *uiTexture; ScreenManager *screenManager; @@ -145,6 +147,8 @@ void NativeGetAppInfo(std::string *app_dir_name, std::string *app_nice_name, boo *app_nice_name = "PPSSPP"; *app_dir_name = "ppsspp"; *landscape = true; + + ArmEmitterTest(); } void NativeInit(int argc, const char *argv[], const char *savegame_directory, const char *external_directory, const char *installID)