JIT for frsqrte; not really a large difference, but drops it off a

profile I'm looking at.



git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3671 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
magumagu9 2009-07-04 00:48:09 +00:00
parent c52e4d281e
commit 1b936a4f71
5 changed files with 32 additions and 15 deletions

View File

@ -80,6 +80,11 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
Default(inst); return;
}
if (inst.SUBOP5 != 18 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21 &&
inst.SUBOP5 != 25) {
Default(inst); return;
}
// Only the interpreter has "proper" support for (some) FP flags
if (inst.SUBOP5 == 25 && Core::g_CoreStartupParameter.bEnableFPRF) {
Default(inst); return;
@ -91,12 +96,6 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add
case 23: //sel
Default(inst);
break;
case 24: //res
Default(inst);
break;
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul
default:
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");

View File

@ -124,6 +124,7 @@ enum Opcode {
FSAdd,
FSSub,
FSNeg,
FSRSqrt,
FPAdd,
FPMul,
FPSub,
@ -444,6 +445,9 @@ public:
InstLoc EmitFSNeg(InstLoc op1) {
return FoldUOp(FSNeg, op1);
}
InstLoc EmitFSRSqrt(InstLoc op1) {
return FoldUOp(FSRSqrt, op1);
}
InstLoc EmitFDMul(InstLoc op1, InstLoc op2) {
return FoldBiOp(FDMul, op1, op2);
}

View File

@ -430,7 +430,8 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI,
#ifdef _M_IX86
return MDisp(baseReg, (u32)Memory::base + offset + ProfileOffset);
#else
return MComplex(RBX, baseReg, 1, offset + ProfileOffset);
LEA(32, EAX, MDisp(baseReg, offset + ProfileOffset));
return MComplex(RBX, EAX, 1, 0);
#endif
}
return MDisp(baseReg, offset);
@ -734,6 +735,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile, bool Mak
case FSMul:
case FSAdd:
case FSSub:
case FSRSqrt:
case FDMul:
case FDAdd:
case FDSub:
@ -1370,6 +1372,14 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile, bool Mak
fregEmitBinInst(RI, I, &Jit64::SUBSS);
break;
}
case FSRSqrt: {
if (!thisUsed) break;
X64Reg reg = fregFindFreeReg(RI);
Jit->RSQRTSS(reg, fregLocForInst(RI, getOp1(I)));
RI.fregs[reg] = I;
fregNormalRegClear(RI, I);
break;
}
case FDMul: {
if (!thisUsed) break;
fregEmitBinInst(RI, I, &Jit64::MULSD);

View File

@ -31,24 +31,28 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(FloatingPoint)
if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21)) {
if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 &&
inst.SUBOP5 != 21 && inst.SUBOP5 != 26)) {
Default(inst); return;
}
IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA);
switch (inst.SUBOP5)
{
case 25: //mul
val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC));
break;
case 18: //div
case 20: //sub
val = ibuild.EmitFDSub(val, ibuild.EmitLoadFReg(inst.FB));
break;
case 21: //add
val = ibuild.EmitFDAdd(val, ibuild.EmitLoadFReg(inst.FB));
break;
case 23: //sel
case 24: //res
case 25: //mul
val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC));
break;
case 26: //rsqrte
val = ibuild.EmitLoadFReg(inst.FB);
val = ibuild.EmitDoubleToSingle(val);
val = ibuild.EmitFSRSqrt(val);
val = ibuild.EmitDupSingleToMReg(val);
break;
default:
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
}

View File

@ -484,7 +484,7 @@ static GekkoOPTemplate table63_2[] =
{22, Interpreter::fsqrtx, &Jit64::Default, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
{23, Interpreter::fselx, &Jit64::Default, {"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, Interpreter::fmulx, &Jit64::fp_arith_s, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
{26, Interpreter::frsqrtex,&Jit64::Default, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{26, Interpreter::frsqrtex,&Jit64::fp_arith_s, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{28, Interpreter::fmsubx, &Jit64::fmaddXX, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, Interpreter::fmaddx, &Jit64::fmaddXX, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, Interpreter::fnmsubx, &Jit64::fmaddXX, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},