Skip flushing FPU/VFPU regs if none were allocated.

They're not used as often, so this usually saves time.  About 1% during
tests.
This commit is contained in:
Unknown W. Brackets 2014-03-29 20:34:17 -07:00
parent f55c81f096
commit 3001866d18
4 changed files with 45 additions and 8 deletions

View File

@ -15,6 +15,7 @@
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <cstring>
#include "base/logging.h"
#include "Common/CPUDetect.h"
#include "Core/MIPS/ARM/ArmRegCacheFPU.h"
@ -22,7 +23,7 @@
using namespace ArmGen;
ArmRegCacheFPU::ArmRegCacheFPU(MIPSState *mips) : mips_(mips), vr(mr + 32) {
ArmRegCacheFPU::ArmRegCacheFPU(MIPSState *mips) : mips_(mips), vr(mr + 32), initialReady(false) {
if (cpu_info.bNEON) {
numARMFpuReg_ = 32;
} else {
@ -35,22 +36,33 @@ void ArmRegCacheFPU::Init(ARMXEmitter *emitter) {
}
void ArmRegCacheFPU::Start(MIPSAnalyst::AnalysisResults &stats) {
if (!initialReady) {
SetupInitialRegs();
initialReady = true;
}
memcpy(ar, arInitial, sizeof(ar));
memcpy(mr, mrInitial, sizeof(mr));
pendingFlush = false;
}
void ArmRegCacheFPU::SetupInitialRegs() {
for (int i = 0; i < numARMFpuReg_; i++) {
ar[i].mipsReg = -1;
ar[i].isDirty = false;
arInitial[i].mipsReg = -1;
arInitial[i].isDirty = false;
}
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
mr[i].loc = ML_MEM;
mr[i].reg = INVALID_REG;
mr[i].spillLock = false;
mr[i].tempLock = false;
mrInitial[i].loc = ML_MEM;
mrInitial[i].reg = INVALID_REG;
mrInitial[i].spillLock = false;
mrInitial[i].tempLock = false;
}
}
static const ARMReg *GetMIPSAllocationOrder(int &count) {
// We reserve S0-S1 as scratch. Can afford two registers. Maybe even four, which could simplify some things.
static const ARMReg allocationOrder[] = {
S2, S3,
S2, S3,
S4, S5, S6, S7,
S8, S9, S10, S11,
S12, S13, S14, S15
@ -86,6 +98,7 @@ static const ARMReg *GetMIPSAllocationOrder(int &count) {
}
ARMReg ArmRegCacheFPU::MapReg(MIPSReg mipsReg, int mapFlags) {
pendingFlush = true;
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
@ -329,6 +342,11 @@ int ArmRegCacheFPU::FlushGetSequential(int a, int maxArmReg) {
}
void ArmRegCacheFPU::FlushAll() {
if (!pendingFlush) {
// Nothing allocated. FPU regs are not nearly as common as GPR.
return;
}
// Discard temps!
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; i++) {
DiscardR(i);
@ -393,6 +411,7 @@ void ArmRegCacheFPU::FlushAll() {
ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
}
}
pendingFlush = false;
}
void ArmRegCacheFPU::DiscardR(MIPSReg r) {
@ -432,6 +451,7 @@ bool ArmRegCacheFPU::IsTempX(ARMReg r) const {
}
int ArmRegCacheFPU::GetTempR() {
pendingFlush = true;
for (int r = TEMP0; r < TEMP0 + NUM_TEMPS; ++r) {
if (mr[r].loc == ML_MEM && !mr[r].tempLock) {
mr[r].tempLock = true;

View File

@ -122,6 +122,8 @@ public:
int GetNumARMFPURegs();
private:
void SetupInitialRegs();
MIPSState *mips_;
ARMXEmitter *emit_;
u32 compilerPC_;
@ -136,4 +138,9 @@ private:
FPURegARM ar[MAX_ARMFPUREG];
FPURegMIPS mr[NUM_MIPSFPUREG];
FPURegMIPS *vr;
bool pendingFlush;
bool initialReady;
FPURegARM arInitial[MAX_ARMFPUREG];
FPURegMIPS mrInitial[NUM_MIPSFPUREG];
};

View File

@ -40,6 +40,7 @@ void FPURegCache::Start(MIPSState *mips, MIPSAnalyst::AnalysisResults &stats) {
memcpy(xregs, xregsInitial, sizeof(xregs));
memcpy(regs, regsInitial, sizeof(regs));
pendingFlush = false;
}
void FPURegCache::SetupInitialRegs() {
@ -115,6 +116,7 @@ void FPURegCache::ReleaseSpillLocks() {
}
void FPURegCache::MapReg(const int i, bool doLoad, bool makeDirty) {
pendingFlush = true;
_assert_msg_(JIT, !regs[i].location.IsImm(), "WTF - load - imm");
if (!regs[i].away) {
// Reg is at home in the memory register file. Let's pull it out.
@ -176,6 +178,7 @@ bool FPURegCache::IsTempX(X64Reg xr) {
}
int FPURegCache::GetTempR() {
pendingFlush = true;
for (int r = TEMP0; r < TEMP0 + NUM_TEMPS; ++r) {
if (!regs[r].away && !regs[r].tempLocked) {
regs[r].tempLocked = true;
@ -188,6 +191,9 @@ int FPURegCache::GetTempR() {
}
void FPURegCache::Flush() {
if (!pendingFlush) {
return;
}
for (int i = 0; i < NUM_MIPS_FPRS; i++) {
if (regs[i].locked) {
PanicAlert("Somebody forgot to unlock MIPS reg %i.", i);
@ -204,6 +210,7 @@ void FPURegCache::Flush() {
}
}
}
pendingFlush = false;
}
OpArg FPURegCache::GetDefaultLocation(int reg) const {
@ -244,6 +251,7 @@ const int *FPURegCache::GetAllocationOrder(int &count) {
}
X64Reg FPURegCache::GetFreeXReg() {
pendingFlush = true;
int aCount;
const int *aOrder = GetAllocationOrder(aCount);
for (int i = 0; i < aCount; i++) {
@ -284,4 +292,5 @@ void FPURegCache::GetState(FPURegCacheState &state) const {
void FPURegCache::RestoreState(const FPURegCacheState state) {
memcpy(regs, state.regs, sizeof(regs));
memcpy(xregs, state.xregs, sizeof(xregs));
pendingFlush = true;
}

View File

@ -163,6 +163,7 @@ private:
X64CachedFPReg xregs[NUM_X_FPREGS];
MIPSCachedFPReg *vregs;
bool pendingFlush;
bool initialReady;
MIPSCachedFPReg regsInitial[NUM_MIPS_FPRS];
X64CachedFPReg xregsInitial[NUM_X_FPREGS];