From f75d14d3b500be1e8426a37a95ee57edf6320f84 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 10 Feb 2013 15:53:56 +0100 Subject: [PATCH] ARM FPU jit work --- Common/ArmEmitter.cpp | 43 +++++++++++- Common/ArmEmitter.h | 1 + Core/MIPS/ARM/ArmCompFPU.cpp | 109 ++++++++++++------------------- Core/MIPS/ARM/ArmJit.cpp | 7 +- Core/MIPS/ARM/ArmJit.h | 4 +- Core/MIPS/ARM/ArmRegCache.cpp | 10 +-- Core/MIPS/ARM/ArmRegCache.h | 2 +- Core/MIPS/ARM/ArmRegCacheFPU.cpp | 39 +++++------ Core/MIPS/ARM/ArmRegCacheFPU.h | 8 ++- Core/MIPS/x86/CompFPU.cpp | 59 +++++++++++++++++ ext/disarm.cpp | 4 +- 11 files changed, 180 insertions(+), 106 deletions(-) diff --git a/Common/ArmEmitter.cpp b/Common/ArmEmitter.cpp index ec3e7655be..27d73a114c 100644 --- a/Common/ArmEmitter.cpp +++ b/Common/ArmEmitter.cpp @@ -617,6 +617,7 @@ ARMReg ARMXEmitter::SubBase(ARMReg Reg) } return Reg; } + // NEON Specific void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { @@ -646,7 +647,6 @@ void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \ | ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (1 << 6) \ | ((Vm & 0x10) << 2) | (Vm & 0xF)); - } // VFP Specific @@ -655,7 +655,7 @@ void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, Operand2 op) { _assert_msg_(DYNA_REC, Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR"); _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid Base register to VLDR"); - _assert_msg_(DYNA_REC, !(op.Imm12() & 4), "Offset needs to be word aligned"); + _assert_msg_(DYNA_REC, !(op.Imm12() & 4), "VLDR: Offset needs to be word aligned"); bool single_reg = Dest < D0; Dest = SubBase(Dest); @@ -676,7 +676,7 @@ void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, Operand2 op) { _assert_msg_(DYNA_REC, Src >= S0 && Src <= D31, "Passed invalid src register to VSTR"); _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid base register to VSTR"); - _assert_msg_(DYNA_REC, !(op.Imm12() & 4), "Offset needs to be word aligned"); + _assert_msg_(DYNA_REC, !(op.Imm12() & 4), "VSTR: Offset needs to be word aligned"); bool single_reg = Src < D0; Src = SubBase(Src); @@ -774,6 +774,7 @@ void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm) | ((Vd & 0xF) << 12) | (0x2F << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); } } + // VFP and ASIMD void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm) { @@ -845,6 +846,42 @@ void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm) } } } +// VFP and ASIMD +void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd >= S0, "Passed invalid dest register to VADD"); + _assert_msg_(DYNA_REC, Vn >= S0, "Passed invalid Vn to VADD"); + _assert_msg_(DYNA_REC, Vm >= S0, "Passed invalid Vm to VADD"); + bool single_reg = Vd < D0; + bool double_reg = Vd < Q0; + + Vd = SubBase(Vd); + Vn = SubBase(Vn); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x1 << 20) \ + | ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 12) | (0x5 << 9) \ + | ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1)); + } + else + { + if (double_reg) + { + Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x1 << 20) \ + | ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \ + | ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) | (Vm & 0xF)); + } + else + { + _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VADD with Quad Reg without support!"); + //Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) + // | ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) + // | (1 << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); + } + } +} void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) { diff --git a/Common/ArmEmitter.h b/Common/ArmEmitter.h index 636a9a646b..1ae59ff99e 100644 --- a/Common/ArmEmitter.h +++ b/Common/ArmEmitter.h @@ -510,6 +510,7 @@ public: // NEON and VFP void VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VMOV(ARMReg Dest, ARMReg Src); void QuickCallFunction(ARMReg scratchreg, void *func); diff --git a/Core/MIPS/ARM/ArmCompFPU.cpp b/Core/MIPS/ARM/ArmCompFPU.cpp index 9b3dee1bc4..a1aa60bcf3 100644 --- a/Core/MIPS/ARM/ArmCompFPU.cpp +++ b/Core/MIPS/ARM/ArmCompFPU.cpp @@ -28,46 +28,26 @@ #define _POS ((op>>6 ) & 0x1F) #define _SIZE ((op>>11 ) & 0x1F) -#define OLDD Comp_Generic(op); return; +#define DISABLE Comp_Generic(op); return; +#define CONDITIONAL_DISABLE ; namespace MIPSComp { - /* -void Jit::CompFPTriArith(u32 op, void (XEmitter::*arith)(X64Reg reg, OpArg), bool orderMatters) -{ - int ft = _FT; - int fs = _FS; - int fd = _FD; - fpr.Lock(ft, fs, fd); - - if (false && fs == fd) - { - fpr.BindToRegister(fd, true, true); - (this->*arith)(fpr.RX(fd), fpr.R(ft)); - } - else - { - MOVSS(XMM0, fpr.R(fs)); - MOVSS(XMM1, fpr.R(ft)); - fpr.BindToRegister(fd, true, true); - (this->*arith)(XMM0, R(XMM1)); - MOVSS(fpr.RX(fd), R(XMM0)); - } - fpr.UnlockAll(); -} -*/ - - void Jit::Comp_FPU3op(u32 op) { - OLDD + DISABLE + + int ft = _FT; + int fs = _FS; + int fd = _FD; + fpr.MapDirtyInIn(fd, fs, ft); switch (op & 0x3f) { - //case 0: CompFPTriArith(op, &XEmitter::ADDSS, false); break; //F(fd) = F(fs) + F(ft); //add - //case 1: CompFPTriArith(op, &XEmitter::SUBSS, true); break; //F(fd) = F(fs) - F(ft); //sub - //case 2: CompFPTriArith(op, &XEmitter::MULSS, false); break; //F(fd) = F(fs) * F(ft); //mul - //case 3: CompFPTriArith(op, &XEmitter::DIVSS, true); break; //F(fd) = F(fs) / F(ft); //div + case 0: VADD(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) + F(ft); //add + case 1: VSUB(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) - F(ft); //sub + case 2: VMUL(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) * F(ft); //mul + case 3: VDIV(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) / F(ft); //div default: Comp_Generic(op); return; @@ -76,7 +56,7 @@ void Jit::Comp_FPU3op(u32 op) void Jit::Comp_FPULS(u32 op) { - OLDD + DISABLE s32 offset = (s16)(op&0xFFFF); int ft = ((op>>16)&0x1f); @@ -85,28 +65,18 @@ void Jit::Comp_FPULS(u32 op) switch(op >> 26) { - /* case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1 - gpr.Lock(rs); - fpr.Lock(ft); - fpr.BindToRegister(ft, false, true); - MOV(32, R(EAX), gpr.R(rs)); - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOVSS(fpr.RX(ft), MDisp(EAX, (u32)Memory::base + offset)); - gpr.UnlockAll(); - fpr.UnlockAll(); + fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY); + SetR0ToEffectiveAddress(rs, offset); + VLDR(fpr.R(ft), R0, 0); break; + case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1 - gpr.Lock(rs); - fpr.Lock(ft); - fpr.BindToRegister(ft, true, false); - MOV(32, R(EAX), gpr.R(rs)); - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOVSS(MDisp(EAX, (u32)Memory::base + offset), fpr.RX(ft)); - gpr.UnlockAll(); - fpr.UnlockAll(); + fpr.MapReg(ft, 0); + SetR0ToEffectiveAddress(rs, offset); + VSTR(fpr.R(ft), R0, 0); break; - */ + default: Comp_Generic(op); return; @@ -115,44 +85,45 @@ void Jit::Comp_FPULS(u32 op) void Jit::Comp_FPU2op(u32 op) { - OLDD + DISABLE + int fs = _FS; int fd = _FD; switch (op & 0x3f) { - /* + /* case 5: //F(fd) = fabsf(F(fs)); break; //abs fpr.Lock(fd, fs); fpr.BindToRegister(fd, fd == fs, true); - MOVSS(fpr.RX(fd), fpr.R(fs)); - PAND(fpr.RX(fd), M((void *)ssNoSignMask)); + MOVSS(fpr.R(fd), fpr.R(fs)); + PAND(fpr.R(fd), M((void *)ssNoSignMask)); fpr.UnlockAll(); break; + */ + + case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt + fpr.MapDirtyIn(fd, fs); + VSQRT(fpr.R(fd), fpr.R(fd)); + return; + case 6: //F(fd) = F(fs); break; //mov - if (fd != fs) { - fpr.Lock(fd, fs); - fpr.BindToRegister(fd, fd == fs, true); - MOVSS(fpr.RX(fd), fpr.R(fs)); - fpr.UnlockAll(); - } + fpr.MapDirtyIn(fd, fs); + VMOV(fpr.R(fd), fpr.R(fd)); break; + /* case 7: //F(fd) = -F(fs); break; //neg fpr.Lock(fd, fs); fpr.BindToRegister(fd, fd == fs, true); - MOVSS(fpr.RX(fd), fpr.R(fs)); - PXOR(fpr.RX(fd), M((void *)ssSignBits2)); + MOVSS(fpr.R(fd), fpr.R(fs)); + PXOR(fpr.R(fd), M((void *)ssSignBits2)); fpr.UnlockAll(); break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s - case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt - Comp_Generic(op); - return; - case 13: //FsI(fd) = F(fs)>=0 ? (int)floorf(F(fs)) : (int)ceilf(F(fs)); break;//trunc.w.s fpr.Lock(fs, fd); fpr.StoreFromRegister(fd); @@ -174,7 +145,7 @@ void Jit::Comp_FPU2op(u32 op) void Jit::Comp_mxc1(u32 op) { - OLDD + DISABLE int fs = _FS; int rt = _RT; @@ -199,7 +170,7 @@ void Jit::Comp_mxc1(u32 op) gpr.StoreFromRegister(rt); fpr.Lock(fs); fpr.BindToRegister(fs, false, true); - MOVSS(fpr.RX(fs), gpr.R(rt)); + MOVSS(fpr.R(fs), gpr.R(rt)); fpr.UnlockAll(); return; */ diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index d34cb95f43..a1760d94bf 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -54,11 +54,11 @@ void DisassembleArm(const u8 *data, int size) { namespace MIPSComp { -Jit::Jit(MIPSState *mips) : blocks(mips), gpr(mips), mips_(mips) +Jit::Jit(MIPSState *mips) : blocks(mips), gpr(mips), fpr(mips), mips_(mips) { blocks.Init(); gpr.SetEmitter(this); - //fpr.SetEmitter(this); + fpr.SetEmitter(this); AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards. GenerateFixedCode(); } @@ -66,7 +66,7 @@ Jit::Jit(MIPSState *mips) : blocks(mips), gpr(mips), mips_(mips) void Jit::FlushAll() { gpr.FlushAll(); - //fpr.Flush(FLUSH_ALL); + fpr.FlushAll(); } void Jit::ClearCache() @@ -148,6 +148,7 @@ const u8 *Jit::DoJit(u32 em_address, ArmJitBlock *b) MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address); gpr.Start(analysis); + fpr.Start(analysis); int numInstructions = 0; int cycles = 0; diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index 1172050eaa..b9b2c1752c 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -21,6 +21,7 @@ #include "ArmJitCache.h" #include "ArmRegCache.h" +#include "ArmRegCacheFPU.h" #include "ArmAsm.h" namespace MIPSComp @@ -139,7 +140,6 @@ private: void CompShiftImm(u32 op, void (ARMXEmitter::*shift)(int, OpArg, OpArg)); void CompShiftVar(u32 op, void (XEmitter::*shift)(int, OpArg, OpArg)); - void CompFPTriArith(u32 op, void (XEmitter::*arith)(X64Reg reg, OpArg), bool orderMatters); */ // Utils @@ -150,7 +150,7 @@ private: ArmJitState js; ArmRegCache gpr; - // FPURegCache fpr; + ArmRegCacheFPU fpr; MIPSState *mips_; diff --git a/Core/MIPS/ARM/ArmRegCache.cpp b/Core/MIPS/ARM/ArmRegCache.cpp index 7841324e22..18815f4e5c 100644 --- a/Core/MIPS/ARM/ArmRegCache.cpp +++ b/Core/MIPS/ARM/ArmRegCache.cpp @@ -50,7 +50,7 @@ static const ARMReg *GetMIPSAllocationOrder(int &count) { // R8 is used to preserve flags in nasty branches. // R9 and upwards are reserved for jit basics. static const ARMReg allocationOrder[] = { - R2, R3, R4, R5, R6, R7 + R12, R2, R3, R4, R5, R6, R7, }; count = sizeof(allocationOrder) / sizeof(const int); return allocationOrder; @@ -67,7 +67,7 @@ ARMReg ArmRegCache::MapReg(MIPSReg mipsReg, int mapFlags) { if (mapFlags & MAP_DIRTY) { ar[mr[mipsReg].reg].isDirty = true; } - return mr[mipsReg].reg; + return (ARMReg)mr[mipsReg].reg; } // Okay, not mapped, so we need to allocate an ARM register. @@ -189,11 +189,11 @@ void ArmRegCache::FlushMipsReg(MIPSReg r) { break; case ML_ARMREG: - if (mr[r].reg == INVALID_REG) { + if (mr[r].reg == (int)INVALID_REG) { ERROR_LOG(HLE, "FlushMipsReg: MipsReg had bad ArmReg"); } if (ar[mr[r].reg].isDirty) { - emit->STR(CTXREG, mr[r].reg, GetMipsRegOffset(r)); + emit->STR(CTXREG, (ARMReg)mr[r].reg, GetMipsRegOffset(r)); ar[mr[r].reg].isDirty = false; } ar[mr[r].reg].mipsReg = -1; @@ -274,7 +274,7 @@ void ArmRegCache::ReleaseSpillLocks() { ARMReg ArmRegCache::R(int mipsReg) { if (mr[mipsReg].loc == ML_ARMREG) { - return mr[mipsReg].reg; + return (ARMReg)mr[mipsReg].reg; } else { ERROR_LOG(JIT, "Reg %i not in arm reg. compilerPC = %08x", mipsReg, compilerPC_); return INVALID_REG; // BAAAD diff --git a/Core/MIPS/ARM/ArmRegCache.h b/Core/MIPS/ARM/ArmRegCache.h index a06d61a32c..e93a3fc781 100644 --- a/Core/MIPS/ARM/ArmRegCache.h +++ b/Core/MIPS/ARM/ArmRegCache.h @@ -53,7 +53,7 @@ struct RegMIPS { RegMIPSLoc loc; // Data (only one of these is used, depending on loc. Could make a union). u32 imm; - ARMReg reg; + int reg; // reg index (need to add S0 to get ARMReg) bool spillLock; // if true, this register cannot be spilled. // If loc == ML_MEM, it's back in its location in the CPU context struct. }; diff --git a/Core/MIPS/ARM/ArmRegCacheFPU.cpp b/Core/MIPS/ARM/ArmRegCacheFPU.cpp index 5261c0dd0c..66af8a1db0 100644 --- a/Core/MIPS/ARM/ArmRegCacheFPU.cpp +++ b/Core/MIPS/ARM/ArmRegCacheFPU.cpp @@ -64,7 +64,7 @@ ARMReg ArmRegCacheFPU::MapReg(MIPSReg mipsReg, int mapFlags) { if (mapFlags & MAP_DIRTY) { ar[mr[mipsReg].reg].isDirty = true; } - return mr[mipsReg].reg; + return (ARMReg)(mr[mipsReg].reg + S0); } // Okay, not mapped, so we need to allocate an ARM register. @@ -86,8 +86,8 @@ allocate: } ar[reg].mipsReg = mipsReg; mr[mipsReg].loc = ML_ARMREG; - mr[mipsReg].reg = (ARMReg)reg; - return (ARMReg)reg; + mr[mipsReg].reg = reg; + return (ARMReg)(reg + S0); } } @@ -96,7 +96,7 @@ allocate: // TODO: Spill dirty regs first? or opposite? int bestToSpill = -1; for (int i = 0; i < allocCount; i++) { - int reg = allocOrder[i]; + int reg = allocOrder[i] - S0; if (ar[reg].mipsReg != -1 && mr[ar[reg].mipsReg].spillLock) continue; bestToSpill = reg; @@ -139,38 +139,39 @@ void ArmRegCacheFPU::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoid } void ArmRegCacheFPU::FlushArmReg(ARMReg r) { - if (ar[r - S0].mipsReg == -1) { + int reg = r - S0; + if (ar[reg].mipsReg == -1) { // Nothing to do, reg not mapped. return; } - if (ar[r - S0].mipsReg != -1) { - if (ar[r - S0].isDirty && mr[ar[r - S0].mipsReg].loc == ML_ARMREG) - emit->VSTR(CTXREG, r, GetMipsRegOffset(ar[r - S0].mipsReg)); + if (ar[reg].mipsReg != -1) { + if (ar[reg].isDirty && mr[ar[reg].mipsReg].loc == ML_ARMREG) + emit->VSTR(CTXREG, r, GetMipsRegOffset(ar[reg].mipsReg)); // IMMs won't be in an ARM reg. - mr[ar[r - S0].mipsReg].loc = ML_MEM; - mr[ar[r - S0].mipsReg].reg = INVALID_REG; - mr[ar[r - S0].mipsReg].imm = 0; + mr[ar[reg].mipsReg].loc = ML_MEM; + mr[ar[reg].mipsReg].reg = INVALID_REG; + mr[ar[reg].mipsReg].imm = 0; } else { ERROR_LOG(HLE, "Dirty but no mipsreg?"); } - ar[r].isDirty = false; - ar[r].mipsReg = -1; + ar[reg].isDirty = false; + ar[reg].mipsReg = -1; } void ArmRegCacheFPU::FlushMipsReg(MIPSReg r) { switch (mr[r].loc) { case ML_IMM: // IMM is always "dirty". - emit->MOVI2R(R0, mr[r].imm); - emit->STR(CTXREG, R0, GetMipsRegOffset(r)); + // IMM is not allowed for FP (yet). + ERROR_LOG(HLE, "Imm in FP register?"); break; case ML_ARMREG: - if (mr[r].reg == INVALID_REG) { + if (mr[r].reg == (int)INVALID_REG) { ERROR_LOG(HLE, "FlushMipsReg: MipsReg had bad ArmReg"); } if (ar[mr[r].reg].isDirty) { - emit->STR(CTXREG, mr[r].reg, GetMipsRegOffset(r)); + emit->VSTR(CTXREG, (ARMReg)(mr[r].reg + S0), GetMipsRegOffset(r)); ar[mr[r].reg].isDirty = false; } ar[mr[r].reg].mipsReg = -1; @@ -185,7 +186,7 @@ void ArmRegCacheFPU::FlushMipsReg(MIPSReg r) { break; } mr[r].loc = ML_MEM; - mr[r].reg = INVALID_REG; + mr[r].reg = (int)INVALID_REG; mr[r].imm = 0; } @@ -249,7 +250,7 @@ void ArmRegCacheFPU::ReleaseSpillLocks() { ARMReg ArmRegCacheFPU::R(int mipsReg) { if (mr[mipsReg].loc == ML_ARMREG) { - return mr[mipsReg].reg; + return (ARMReg)(mr[mipsReg].reg + S0); } else { ERROR_LOG(JIT, "Reg %i not in arm reg. compilerPC = %08x", mipsReg, compilerPC_); return INVALID_REG; // BAAAD diff --git a/Core/MIPS/ARM/ArmRegCacheFPU.h b/Core/MIPS/ARM/ArmRegCacheFPU.h index 2ad58cba40..b7a3a953da 100644 --- a/Core/MIPS/ARM/ArmRegCacheFPU.h +++ b/Core/MIPS/ARM/ArmRegCacheFPU.h @@ -21,8 +21,8 @@ #include "../MIPS.h" #include "../MIPSAnalyst.h" -#include "ArmEmitter.h" -#include "ArmRegCache.h" +#include "Common/ArmEmitter.h" +#include "Core/MIPS/ARM/ArmRegCache.h" using namespace ArmGen; @@ -67,6 +67,7 @@ public: // Returns an ARM register containing the requested MIPS register. ARMReg MapReg(MIPSReg reg, int mapFlags = 0); void MapInIn(MIPSReg rd, MIPSReg rs); + void MapDirty(MIPSReg rd); void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true); void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true); void FlushArmReg(ARMReg r); @@ -83,6 +84,9 @@ public: private: int GetMipsRegOffset(MIPSReg r); + int GetMipsRegOffsetV(MIPSReg r) { + return GetMipsRegOffset(r + 32); + } MIPSState *mips_; ARMXEmitter *emit; u32 compilerPC_; diff --git a/Core/MIPS/x86/CompFPU.cpp b/Core/MIPS/x86/CompFPU.cpp index 7494d649f9..7ba16508a5 100644 --- a/Core/MIPS/x86/CompFPU.cpp +++ b/Core/MIPS/x86/CompFPU.cpp @@ -144,11 +144,70 @@ void Jit::Comp_FPULS(u32 op) } } +static const u64 GC_ALIGNED16(ssOneBits[2]) = {0x0000000100000001ULL, 0x0000000100000001ULL}; static const u64 GC_ALIGNED16(ssSignBits2[2]) = {0x8000000080000000ULL, 0x8000000080000000ULL}; static const u64 GC_ALIGNED16(ssNoSignMask[2]) = {0x7FFFFFFF7FFFFFFFULL, 0x7FFFFFFF7FFFFFFFULL}; +<<<<<<< Updated upstream void Jit::Comp_FPU2op(u32 op) { +======= +void Jit::Comp_FPUComp(u32 op) { + // TODO: Doesn't work yet. + DISABLE; + + // TODO: Compile this more efficiently by combining with the following branch, which usually is there. + // In that case, probably want to use COMISS rather than CMPSS. + int fs = _FS; + int ft = _FT; + switch (op & 0xf) + { + case 0: //f + case 1: //un + case 8: //sf + case 9: //ngle + // cond = false; + MOV(32, M(¤tMIPS->fpcond), Imm32(0)); + break; + + case 2: //eq // fs == ft + case 10: //seq + case 3: //ueq + case 11: //ngl + fpr.BindToRegister(fs, true, false); + CMPSS(fpr.RX(fs), fpr.R(ft), 0); + ANDPS(fpr.RX(fs), M((void *)&ssOneBits)); + MOVSS(M(¤tMIPS->fpcond), fpr.RX(fs)); + break; + + case 4: //olt // fs < ft + case 5: //ult + case 12: //lt + case 13: //nge + fpr.BindToRegister(fs, true, false); + CMPSS(fpr.RX(fs), fpr.R(ft), 1); + ANDPS(fpr.RX(fs), M((void *)&ssOneBits)); + MOVSS(M(¤tMIPS->fpcond), fpr.RX(fs)); + break; + + case 6: //ole // fs >= ft (ft < fs) + case 7: //ule + case 14: //le + case 15: //ngt + fpr.BindToRegister(ft, true, false); + CMPSS(fpr.RX(ft), fpr.R(fs), 1); + ANDPS(fpr.RX(ft), M((void *)&ssOneBits)); + MOVSS(M(¤tMIPS->fpcond), fpr.RX(ft)); + break; + + default: + _dbg_assert_msg_(CPU,0,"Trying to interpret FPUComp instruction that can't be interpreted"); + break; + } +} + +void Jit::Comp_FPU2op(u32 op) { +>>>>>>> Stashed changes CONDITIONAL_DISABLE; int fs = _FS; diff --git a/ext/disarm.cpp b/ext/disarm.cpp index ffd3d756b5..a700847367 100644 --- a/ext/disarm.cpp +++ b/ext/disarm.cpp @@ -308,8 +308,8 @@ instr_disassemble(word instr, address addr, pDisOptions opts) { fpn = ((instr>>15)&1) + ((instr>>21)&2); - result.undefined = - result.badbits = + result.undefined = 0; + result.badbits = 0; result.oddbits = 0; result.is_SWI = 0; result.target_type = target_None;