From 1b936a4f713885d0b77eec3c4366d72f8ed36b10 Mon Sep 17 00:00:00 2001 From: magumagu9 Date: Sat, 4 Jul 2009 00:48:09 +0000 Subject: [PATCH] JIT for frsqrte; not really a large difference, but drops it off a profile I'm looking at. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3671 8ced0084-cf51-0410-be5f-012b33b47a6e --- .../Src/PowerPC/Jit64/Jit_FloatingPoint.cpp | 11 +++++------ Source/Core/Core/Src/PowerPC/Jit64IL/IR.h | 4 ++++ .../Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp | 12 +++++++++++- .../Src/PowerPC/Jit64IL/Jit_FloatingPoint.cpp | 18 +++++++++++------- Source/Core/Core/Src/PowerPC/PPCTables.cpp | 2 +- 5 files changed, 32 insertions(+), 15 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp index 1c47a2eb12..04b82858ff 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -80,6 +80,11 @@ void Jit64::fp_arith_s(UGeckoInstruction inst) Default(inst); return; } + if (inst.SUBOP5 != 18 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21 && + inst.SUBOP5 != 25) { + Default(inst); return; + } + // Only the interpreter has "proper" support for (some) FP flags if (inst.SUBOP5 == 25 && Core::g_CoreStartupParameter.bEnableFPRF) { Default(inst); return; @@ -91,12 +96,6 @@ void Jit64::fp_arith_s(UGeckoInstruction inst) case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add - case 23: //sel - Default(inst); - break; - case 24: //res - Default(inst); - break; case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul default: _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h index 92279da7cc..7e93c8aaaf 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h @@ -124,6 +124,7 @@ enum Opcode { FSAdd, FSSub, FSNeg, + FSRSqrt, FPAdd, FPMul, FPSub, @@ -444,6 +445,9 @@ public: InstLoc EmitFSNeg(InstLoc op1) { return FoldUOp(FSNeg, op1); } + InstLoc EmitFSRSqrt(InstLoc op1) { + return FoldUOp(FSRSqrt, op1); + } InstLoc EmitFDMul(InstLoc op1, InstLoc op2) { return FoldBiOp(FDMul, op1, op2); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index af2fceb2e2..a93e750e4f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -430,7 +430,8 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, #ifdef _M_IX86 return MDisp(baseReg, (u32)Memory::base + offset + ProfileOffset); #else - return MComplex(RBX, baseReg, 1, offset + ProfileOffset); + LEA(32, EAX, MDisp(baseReg, offset + ProfileOffset)); + return MComplex(RBX, EAX, 1, 0); #endif } return MDisp(baseReg, offset); @@ -734,6 +735,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile, bool Mak case FSMul: case FSAdd: case FSSub: + case FSRSqrt: case FDMul: case FDAdd: case FDSub: @@ -1370,6 +1372,14 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile, bool Mak fregEmitBinInst(RI, I, &Jit64::SUBSS); break; } + case FSRSqrt: { + if (!thisUsed) break; + X64Reg reg = fregFindFreeReg(RI); + Jit->RSQRTSS(reg, fregLocForInst(RI, getOp1(I))); + RI.fregs[reg] = I; + fregNormalRegClear(RI, I); + break; + } case FDMul: { if (!thisUsed) break; fregEmitBinInst(RI, I, &Jit64::MULSD); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_FloatingPoint.cpp index 38ae196cde..d69f65dfa5 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_FloatingPoint.cpp @@ -31,24 +31,28 @@ void Jit64::fp_arith_s(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(FloatingPoint) - if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21)) { + if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && + inst.SUBOP5 != 21 && inst.SUBOP5 != 26)) { Default(inst); return; } IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA); switch (inst.SUBOP5) { - case 25: //mul - val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC)); - break; - case 18: //div case 20: //sub val = ibuild.EmitFDSub(val, ibuild.EmitLoadFReg(inst.FB)); break; case 21: //add val = ibuild.EmitFDAdd(val, ibuild.EmitLoadFReg(inst.FB)); break; - case 23: //sel - case 24: //res + case 25: //mul + val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC)); + break; + case 26: //rsqrte + val = ibuild.EmitLoadFReg(inst.FB); + val = ibuild.EmitDoubleToSingle(val); + val = ibuild.EmitFSRSqrt(val); + val = ibuild.EmitDupSingleToMReg(val); + break; default: _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); } diff --git a/Source/Core/Core/Src/PowerPC/PPCTables.cpp b/Source/Core/Core/Src/PowerPC/PPCTables.cpp index 798cc35147..0eeb1b57bd 100644 --- a/Source/Core/Core/Src/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCTables.cpp @@ -484,7 +484,7 @@ static GekkoOPTemplate table63_2[] = {22, Interpreter::fsqrtx, &Jit64::Default, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, {23, Interpreter::fselx, &Jit64::Default, {"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, {25, Interpreter::fmulx, &Jit64::fp_arith_s, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, - {26, Interpreter::frsqrtex,&Jit64::Default, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, + {26, Interpreter::frsqrtex,&Jit64::fp_arith_s, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, {28, Interpreter::fmsubx, &Jit64::fmaddXX, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {29, Interpreter::fmaddx, &Jit64::fmaddXX, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, {30, Interpreter::fnmsubx, &Jit64::fmaddXX, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},