Fix numerous ARM JIT bugs. Activate vmtvc and vscl, and vadd/vmul/vdiv/vsub for real this time.

This commit is contained in:
Henrik Rydgard 2013-07-31 10:33:44 +02:00
parent 02e3315981
commit 51596b636a
11 changed files with 95 additions and 67 deletions

View File

@ -265,8 +265,12 @@ void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize)
}
void ARMXEmitter::QuickCallFunction(ARMReg reg, void *func) {
MOVI2R(reg, (u32)(func));
BL(reg);
if (BLInRange(func)) {
BL(func);
} else {
MOVI2R(reg, (u32)(func));
BL(reg);
}
}
void ARMXEmitter::SetCodePtr(u8 *ptr)
@ -433,6 +437,14 @@ void ARMXEmitter::B(ARMReg src)
Write32(condition | 0x12FFF10 | src);
}
bool ARMXEmitter::BLInRange(const void *fnptr) {
s32 distance = (s32)fnptr - (s32(code) + 8);
if (distance <= -33554432 || distance > 33554432)
return false;
else
return true;
}
void ARMXEmitter::BL(const void *fnptr)
{
s32 distance = (s32)fnptr - (s32(code) + 8);

View File

@ -431,6 +431,7 @@ public:
void B (ARMReg src);
void BL(const void *fnptr);
void BL(ARMReg src);
bool BLInRange(const void *fnptr);
void PUSH(const int num, ...);
void POP(const int num, ...);

View File

@ -29,6 +29,13 @@
#include "ArmRegCache.h"
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define _RS ((op>>21) & 0x1F)
#define _RT ((op>>16) & 0x1F)
#define _RD ((op>>11) & 0x1F)
@ -38,13 +45,6 @@
#define _POS ((op>>6 ) & 0x1F)
#define _SIZE ((op>>11 ) & 0x1F)
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
namespace MIPSComp
{
// Vector regs can overlap in all sorts of swizzled ways.
@ -73,13 +73,12 @@ namespace MIPSComp
void Jit::Comp_VPFX(u32 op)
{
// logBlocks = 1;
CONDITIONAL_DISABLE;
int data = op & 0xFFFFF;
int regnum = (op >> 24) & 3;
switch (regnum) {
case 0: // S
//ERROR_LOG(CPU, "VPFX - S %08x %i", data, regnum);
js.prefixS = data;
js.prefixSFlag = ArmJitState::PREFIX_KNOWN_DIRTY;
break;
@ -91,6 +90,9 @@ namespace MIPSComp
js.prefixD = data;
js.prefixDFlag = ArmJitState::PREFIX_KNOWN_DIRTY;
break;
default:
ERROR_LOG(CPU, "VPFX - bad regnum %i : data=%08x", regnum, data);
break;
}
}
@ -124,7 +126,7 @@ namespace MIPSComp
// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
// TODO: But some ops seem to use const 0 instead?
if (regnum >= n) {
WARN_LOG(CPU, "Invalid VFPU swizzle: %08x: %d / %d", prefix, regnum, sz);
WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC));
regnum = 0;
}
@ -559,11 +561,9 @@ namespace MIPSComp
VMOV(fpr.V(vd), S0);
}
void Jit::Comp_VecDo3(u32 op)
{
void Jit::Comp_VecDo3(u32 op) {
CONDITIONAL_DISABLE;
DISABLE; // breaks tekken 6 ... I don't understand how
// WARNING: No prefix support!
if (js.MayHavePrefix()) {
DISABLE;
@ -623,15 +623,13 @@ namespace MIPSComp
}
for (int i = 0; i < n; i++) {
fpr.MapRegV(tempregs[i], MAP_NOINIT);
fpr.SpillLockV(tempregs[i]);
fpr.MapInInV(sregs[i], tregs[i]);
fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]);
(this->*triop)(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i]));
}
for (int i = 0; i < n; i++) {
if (dregs[i] != tempregs[i]) {
fpr.MapRegV(dregs[i], MAP_DIRTY | MAP_NOINIT);
fpr.MapDirtyInV(dregs[i], tempregs[i]);
VMOV(fpr.V(dregs[i]), fpr.V(tempregs[i]));
}
}
@ -643,8 +641,6 @@ namespace MIPSComp
void Jit::Comp_VV2Op(u32 op) {
CONDITIONAL_DISABLE;
// DISABLE;
if (js.MayHavePrefix()) {
//if (js.HasUnknownPrefix()) {
DISABLE;
@ -663,24 +659,18 @@ namespace MIPSComp
GetVectorRegsPrefixD(dregs, sz, _VD);
MIPSReg tempregs[4];
for (int i = 0; i < n; ++i)
{
if (!IsOverlapSafe(dregs[i], i, n, sregs))
{
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], i, n, sregs)) {
tempregs[i] = fpr.GetTempV();
}
else
{
} else {
tempregs[i] = dregs[i];
}
}
// Warning: sregs[i] and tempxregs[i] may be the same reg.
// Helps for vmov, hurts for vrcp, etc.
for (int i = 0; i < n; ++i)
{
switch ((op >> 16) & 0x1f)
{
for (int i = 0; i < n; ++i) {
switch ((op >> 16) & 0x1f) {
case 0: // d[i] = s[i]; break; //vmov
// Probably for swizzle.
fpr.MapDirtyInV(tempregs[i], sregs[i]);
@ -804,24 +794,26 @@ namespace MIPSComp
}
break;
case 7: //mtv
case 7: // mtv
if (imm < 128) {
gpr.FlushR(rt);
fpr.MapRegV(imm, MAP_DIRTY | MAP_NOINIT);
VLDR(fpr.V(imm), CTXREG, gpr.GetMipsRegOffset(rt));
} else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc //currentMIPS->vfpuCtrl[imm - 128] = R(rt);
DISABLE;
gpr.MapReg(rt);
STR(gpr.R(rt), CTXREG, offsetof(MIPSState, vfpuCtrl) + 4 * (imm - 128));
//gpr.BindToRegister(rt, true, false);
//MOV(32, M(&currentMIPS->vfpuCtrl[imm - 128]), gpr.R(rt));
// TODO: Optimization if rt is Imm?
//if (imm - 128 == VFPU_CTRL_SPREFIX) {
//js.prefixSFlag = JitState::PREFIX_UNKNOWN;
//} else if (imm - 128 == VFPU_CTRL_TPREFIX) {
// js.prefixTFlag = JitState::PREFIX_UNKNOWN;
//} else if (imm - 128 == VFPU_CTRL_DPREFIX) {
// js.prefixDFlag = JitState::PREFIX_UNKNOWN;
//}
// Set these BEFORE disable!
if (imm - 128 == VFPU_CTRL_SPREFIX) {
js.prefixSFlag = ArmJitState::PREFIX_UNKNOWN;
} else if (imm - 128 == VFPU_CTRL_TPREFIX) {
js.prefixTFlag = ArmJitState::PREFIX_UNKNOWN;
} else if (imm - 128 == VFPU_CTRL_DPREFIX) {
js.prefixDFlag = ArmJitState::PREFIX_UNKNOWN;
}
} else {
//ERROR
_dbg_assert_msg_(CPU,0,"mtv - invalid register");
@ -831,17 +823,18 @@ namespace MIPSComp
default:
DISABLE;
}
fpr.ReleaseSpillLocksAndDiscardTemps();
}
void Jit::Comp_Vmtvc(u32 op) {
CONDITIONAL_DISABLE;
DISABLE;
int vs = _VS;
int imm = op & 0xFF;
if (imm >= 128 && imm < 128 + VFPU_CTRL_MAX) {
fpr.MapRegV(vs, 0);
ADD(R0, CTXREG, offsetof(MIPSState, vfpuCtrl[0]) + (imm - 128) * 4);
fpr.MapRegV(vs);
ADDI2R(R0, CTXREG, offsetof(MIPSState, vfpuCtrl[0]) + (imm - 128) * 4, R1);
VSTR(fpr.V(vs), R0, 0);
fpr.ReleaseSpillLocksAndDiscardTemps();
@ -864,7 +857,7 @@ namespace MIPSComp
}
if (_VS == _VD) {
// A lot of these in Wipeout...
// A lot of these in Wipeout... Just drop the instruction entirely.
return;
}
@ -898,18 +891,17 @@ namespace MIPSComp
void Jit::Comp_VScl(u32 op) {
CONDITIONAL_DISABLE;
// Still troublesome! WTF!
DISABLE;
if (js.MayHavePrefix())
if (js.MayHavePrefix()) {
DISABLE; // broken!
}
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], dregs[4], treg;
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixT(&treg, V_Single, _VT);
GetVectorRegs(&treg, V_Single, _VT);
GetVectorRegsPrefixD(dregs, sz, _VD);
// Move to S0 early, so we don't have to worry about overlap with scale.
@ -919,7 +911,7 @@ namespace MIPSComp
// and that there's no overlap.
MIPSReg tempregs[4];
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], i, n, sregs)) {
if (dregs[i] == treg || !IsOverlapSafe(dregs[i], i, n, sregs)) {
// Need to use temp regs
tempregs[i] = fpr.GetTempV();
} else {
@ -930,13 +922,12 @@ namespace MIPSComp
// The meat of the function!
for (int i = 0; i < n; i++) {
fpr.MapDirtyInV(tempregs[i], sregs[i]);
fpr.SpillLockV(tempregs[i]);
VMUL(fpr.V(tempregs[i]), fpr.V(sregs[i]), S0);
}
for (int i = 0; i < n; i++) {
// All must be mapped for prefixes to work.
if (tempregs[i] != dregs[i]) {
if (dregs[i] != tempregs[i]) {
fpr.MapDirtyInV(dregs[i], tempregs[i]);
VMOV(fpr.V(dregs[i]), fpr.V(tempregs[i]));
}

View File

@ -94,6 +94,8 @@ void Jit::FlushPrefixV()
{
if ((js.prefixSFlag & ArmJitState::PREFIX_DIRTY) != 0)
{
//if (js.prefixS & 0xF0000000)
// ERROR_LOG(CPU, "Flushing BAD S-flag prefix: %08x", js.prefixS);
MOVI2R(R0, js.prefixS);
STR(R0, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));
js.prefixSFlag = (ArmJitState::PrefixState) (js.prefixSFlag & ~ArmJitState::PREFIX_DIRTY);
@ -101,6 +103,8 @@ void Jit::FlushPrefixV()
if ((js.prefixTFlag & ArmJitState::PREFIX_DIRTY) != 0)
{
//if (js.prefixT & 0xF0000000)
// ERROR_LOG(CPU, "Flushing BAD T-flag prefix: %08x", js.prefixS);
MOVI2R(R0, js.prefixT);
STR(R0, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX]));
js.prefixTFlag = (ArmJitState::PrefixState) (js.prefixTFlag & ~ArmJitState::PREFIX_DIRTY);
@ -108,6 +112,8 @@ void Jit::FlushPrefixV()
if ((js.prefixDFlag & ArmJitState::PREFIX_DIRTY) != 0)
{
//if (js.prefixD & 0xF0000000)
// ERROR_LOG(CPU, "Flushing BAD D-flag prefix: %08x", js.prefixS);
MOVI2R(R0, js.prefixD);
STR(R0, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX]));
js.prefixDFlag = (ArmJitState::PrefixState) (js.prefixDFlag & ~ArmJitState::PREFIX_DIRTY);
@ -288,9 +294,13 @@ void Jit::Comp_Generic(u32 op)
RestoreDowncount();
}
// Might have eaten prefixes, hard to tell...
if ((MIPSGetInfo(op) & IS_VFPU) != 0)
js.PrefixStart();
const int info = MIPSGetInfo(op);
if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0)
{
// If it does eat them, it'll happen in MIPSCompileOp().
if ((info & OUT_EAT_PREFIX) == 0)
js.PrefixUnknown();
}
}
void Jit::MovFromPC(ARMReg r) {

View File

@ -70,10 +70,9 @@ struct ArmJitState
PrefixState prefixTFlag;
PrefixState prefixDFlag;
void PrefixStart() {
PrefixUnknown();
if (startDefaultPrefix) {
EatPrefix();
} else {
PrefixUnknown();
}
}
void PrefixUnknown() {
@ -116,10 +115,12 @@ struct ArmJitState
prefixD = 0x0;
}
}
u8 VfpuWriteMask() const {
_assert_(prefixDFlag & PREFIX_KNOWN);
return (prefixD >> 8) & 0xF;
}
bool VfpuWriteMask(int i) const {
_assert_(prefixDFlag & PREFIX_KNOWN);
return (prefixD >> (8 + i)) & 1;

View File

@ -110,7 +110,7 @@ allocate:
int bestToSpill = -1;
for (int i = 0; i < allocCount; i++) {
int reg = allocOrder[i] - S0;
if (ar[reg].mipsReg != -1 && mr[ar[reg].mipsReg].spillLock)
if (ar[reg].mipsReg != -1 && (mr[ar[reg].mipsReg].spillLock || mr[ar[reg].mipsReg].tempLock))
continue;
bestToSpill = reg;
break;
@ -172,7 +172,7 @@ void ArmRegCacheFPU::MapRegV(int vreg, int flags) {
void ArmRegCacheFPU::LoadToRegV(ARMReg armReg, int vreg) {
if (vr[vreg].loc == ML_ARMREG) {
emit_->VMOV(armReg, vr[vreg].reg);
emit_->VMOV(armReg, (ARMReg)(S0 + vr[vreg].reg));
} else {
MapRegV(vreg);
emit_->VMOV(armReg, V(vreg));
@ -325,6 +325,7 @@ int ArmRegCacheFPU::GetTempR() {
}
}
ERROR_LOG(CPU, "Out of temp regs! Might need to DiscardR() some");
_assert_msg_(DYNA_REC, 0, "Regcache ran out of temp regs, might need to DiscardR() some.");
return -1;
}
@ -347,7 +348,7 @@ void ArmRegCacheFPU::FlushAll() {
}
int ArmRegCacheFPU::GetMipsRegOffset(MIPSReg r) {
// These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs".
// These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs", then the VFPU ctrls.
if (r < 32 + 128 + NUM_TEMPS)
return (r + 32) << 2;
ERROR_LOG(JIT, "bad mips register %i, out of range", r);

View File

@ -98,6 +98,8 @@ public:
float v[128];
u32 vi[128];
};
// Temps don't get flushed so we don't reserve space for them.
// If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code.
u32 vfpuCtrl[16];
u32 pc;

View File

@ -132,9 +132,11 @@ void ApplyPrefixST(float *v, u32 data, VectorSize size)
{
// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
// TODO: But some ops seem to use const 0 instead?
if (regnum >= n)
{
ERROR_LOG_REPORT(CPU, "Invalid VFPU swizzle: %08x / %d", data, size);
if (regnum >= n) {
ERROR_LOG_REPORT(CPU, "Invalid VFPU swizzle: %08x: %i / %d at PC = %08x (%s)", data, regnum, n, currentMIPS->pc, currentMIPS->DisasmAt(currentMIPS->pc));
//for (int i = 0; i < 12; i++) {
// ERROR_LOG(CPU, " vfpuCtrl[%i] = %08x", i, currentMIPS->vfpuCtrl[i]);
//}
regnum = 0;
}

View File

@ -954,6 +954,8 @@ void Jit::Comp_Vmmov(u32 op) {
void Jit::Comp_VScl(u32 op) {
CONDITIONAL_DISABLE;
ERROR_LOG(CPU, "vscl @ %08x", js.compilerPC);
if (js.HasUnknownPrefix())
DISABLE;
@ -972,7 +974,7 @@ void Jit::Comp_VScl(u32 op) {
X64Reg tempxregs[4];
for (int i = 0; i < n; ++i)
{
if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs))
if (dregs[i] != scale || !IsOverlapSafeAllowS(dregs[i], i, n, sregs))
{
int reg = fpr.GetTempV();
fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);

View File

@ -280,6 +280,9 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
js.numInstructions = 0;
while (js.compiling)
{
if (js.prefixS & 0xF0000000) {
ERROR_LOG(CPU, "GARBAGE prefix S : %08x at %08x : %s", js.prefixS, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC));
}
// Jit breakpoints are quite fast, so let's do them in release too.
CheckJitBreakpoint(js.compilerPC, 0);
@ -311,6 +314,9 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
js.compilerPC += 4;
js.numInstructions++;
}
if (js.prefixS & 0xF0000000) {
ERROR_LOG(CPU, "GARBAGE prefix S : %08x at %08x : %s", js.prefixS, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC));
}
b->codeSize = (u32)(GetCodePtr() - b->normalEntry);
NOP();

View File

@ -82,11 +82,11 @@ struct JitState
PrefixState prefixSFlag;
PrefixState prefixTFlag;
PrefixState prefixDFlag;
void PrefixStart() {
PrefixUnknown();
if (startDefaultPrefix) {
EatPrefix();
} else {
PrefixUnknown();
}
}
void PrefixUnknown() {