Non-optimal vmmul for x86 jit.

It's faster than interpreter anyway, but it could be much better.
This commit is contained in:
Unknown W. Brackets 2013-04-20 01:11:40 -07:00
parent cfac7324d6
commit 29109d25af
5 changed files with 58 additions and 4 deletions

View File

@ -718,4 +718,8 @@ namespace MIPSComp
DISABLE; DISABLE;
} }
void Jit::Comp_Vmmul(u32 op) {
DISABLE;
}
} }

View File

@ -194,6 +194,7 @@ public:
void Comp_Vmtvc(u32 op); void Comp_Vmtvc(u32 op);
void Comp_Vmmov(u32 op); void Comp_Vmmov(u32 op);
void Comp_VScl(u32 op); void Comp_VScl(u32 op);
void Comp_Vmmul(u32 op);
ArmJitBlockCache *GetBlockCache() { return &blocks; } ArmJitBlockCache *GetBlockCache() { return &blocks; }

View File

@ -626,10 +626,10 @@ MIPSInstruction tableVFPU5[8] = //110111 xxx
const MIPSInstruction tableVFPU6[32] = //111100 xxx const MIPSInstruction tableVFPU6[32] = //111100 xxx
{ {
//0 //0
INSTR("vmmul",&Jit::Comp_Generic, Dis_MatrixMult, Int_Vmmul, IS_VFPU|OUT_EAT_PREFIX), INSTR("vmmul",&Jit::Comp_Vmmul, Dis_MatrixMult, Int_Vmmul, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vmmul",&Jit::Comp_Generic, Dis_MatrixMult, Int_Vmmul, IS_VFPU|OUT_EAT_PREFIX), INSTR("vmmul",&Jit::Comp_Vmmul, Dis_MatrixMult, Int_Vmmul, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vmmul",&Jit::Comp_Generic, Dis_MatrixMult, Int_Vmmul, IS_VFPU|OUT_EAT_PREFIX), INSTR("vmmul",&Jit::Comp_Vmmul, Dis_MatrixMult, Int_Vmmul, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vmmul",&Jit::Comp_Generic, Dis_MatrixMult, Int_Vmmul, IS_VFPU|OUT_EAT_PREFIX), INSTR("vmmul",&Jit::Comp_Vmmul, Dis_MatrixMult, Int_Vmmul, IS_VFPU|OUT_EAT_PREFIX),
INSTR("v(h)tfm2",&Jit::Comp_Generic, Dis_Vtfm, Int_Vtfm, IS_VFPU|OUT_EAT_PREFIX), INSTR("v(h)tfm2",&Jit::Comp_Generic, Dis_Vtfm, Int_Vtfm, IS_VFPU|OUT_EAT_PREFIX),
INSTR("v(h)tfm2",&Jit::Comp_Generic, Dis_Vtfm, Int_Vtfm, IS_VFPU|OUT_EAT_PREFIX), INSTR("v(h)tfm2",&Jit::Comp_Generic, Dis_Vtfm, Int_Vtfm, IS_VFPU|OUT_EAT_PREFIX),

View File

@ -783,4 +783,52 @@ void Jit::Comp_VScl(u32 op) {
fpr.ReleaseSpillLocks(); fpr.ReleaseSpillLocks();
} }
void Jit::Comp_Vmmul(u32 op) {
CONDITIONAL_DISABLE;
// TODO: This probably ignores prefixes?
if (js.MayHavePrefix())
DISABLE;
MatrixSize sz = GetMtxSize(op);
int n = GetMatrixSide(sz);
u8 sregs[16], tregs[16], dregs[16];
GetMatrixRegs(sregs, sz, _VS);
GetMatrixRegs(tregs, sz, _VT);
GetMatrixRegs(dregs, sz, _VD);
// TODO: test overlap, fix non-optimal.
u8 tempregs[16];
for (int a = 0; a < n; a++)
{
for (int b = 0; b < n; b++)
{
XORPS(XMM0, R(XMM0));
for (int c = 0; c < n; c++)
{
MOVSS(XMM1, fpr.V(sregs[b * 4 + c]));
MULSS(XMM1, fpr.V(tregs[a * 4 + c]));
ADDSS(XMM0, R(XMM1));
}
u8 temp = (u8) fpr.GetTempV();
fpr.MapRegV(temp, MAP_NOINIT | MAP_DIRTY);
MOVSS(fpr.VX(temp), R(XMM0));
fpr.StoreFromRegisterV(temp);
tempregs[a * 4 + b] = temp;
}
}
for (int a = 0; a < n; a++)
{
for (int b = 0; b < n; b++)
{
u8 temp = tempregs[a * 4 + b];
fpr.MapRegV(temp, 0);
MOVSS(fpr.V(dregs[a * 4 + b]), fpr.VX(temp));
}
}
fpr.ReleaseSpillLocks();
}
} }

View File

@ -204,6 +204,7 @@ public:
void Comp_Vmtvc(u32 op); void Comp_Vmtvc(u32 op);
void Comp_Vmmov(u32 op); void Comp_Vmmov(u32 op);
void Comp_VScl(u32 op); void Comp_VScl(u32 op);
void Comp_Vmmul(u32 op);
void Comp_DoNothing(u32 op); void Comp_DoNothing(u32 op);