Reorder vfpu data in saved kernel contexts when loading in a new version.

This commit is contained in:
Henrik Rydgard 2013-11-28 00:08:45 +01:00
parent 55500d4bb6
commit f9f6e9492d
3 changed files with 37 additions and 9 deletions

View File

@ -520,7 +520,7 @@ public:
virtual void DoState(PointerWrap &p)
{
auto s = p.Section("Thread", 1, 3);
auto s = p.Section("Thread", 1, 4);
if (!s)
return;
@ -533,6 +533,18 @@ public:
// TODO: How do I "version" adding a DoState method to ThreadContext?
p.Do(context);
if (s <= 3)
{
// We must have been loading an old state if we're here.
// Reorder VFPU data to new order.
float temp[128];
memcpy(temp, context.v, 128 * sizeof(float));
for (int i = 0; i < 128; i++) {
context.v[voffset[i]] = temp[i];
}
}
if (s <= 2)
{
context.other[4] = context.other[5];

View File

@ -112,6 +112,11 @@ MIPSState::MIPSState()
// 0x08 0x09 0x0a 0x0b
// ....
// This is because the original indices look like this:
// 0XXMMMYY where M is the matrix number.
// We will now map 0YYMMMXX to 0MMMXXYY.
// Advantages:
// * Columns can be flushed and reloaded faster "at once"
// * 4x4 Matrices are contiguous in RAM, making them, too, fast-loadable in NEON
@ -119,12 +124,12 @@ MIPSState::MIPSState()
// Disadvantages:
// * Extra indirection, can be confusing and slower (interpreter only)
// * Flushing and reloading row registers is now slower
int i = 0;
for (int m = 0; m < 8; m++) {
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
voffset[i++] = m * 4 + x * 32 + y;
voffset[m * 4 + x * 32 + y] = i++;
}
}
}
@ -134,6 +139,13 @@ MIPSState::MIPSState()
fromvoffset[voffset[i]] = i;
}
// Sanity check that things that should be ordered are ordered.
const int firstEight[8] = { 0x0, 0x20, 0x40, 0x60, 0x1, 0x21, 0x41, 0x61 };
for (int i = 0; i < 8; i++) {
if (voffset[firstEight[i]] != i) {
ERROR_LOG(CPU, "Wrong voffset order! %i: %i should have been %i", firstEight[i], voffset[firstEight[i]], i);
}
}
}
MIPSState::~MIPSState()

View File

@ -32,8 +32,10 @@ FPURegCache::FPURegCache() : mips(0), initialReady(false), emit(0) {
void FPURegCache::Start(MIPSState *mips, MIPSAnalyst::AnalysisResults &stats) {
this->mips = mips;
if (!initialReady)
if (!initialReady) {
SetupInitialRegs();
initialReady = true;
}
memcpy(xregs, xregsInitial, sizeof(xregs));
memcpy(regs, regsInitial, sizeof(regs));
@ -50,12 +52,14 @@ void FPURegCache::SetupInitialRegs() {
regsInitial[i].location = base;
base.IncreaseOffset(sizeof(float));
}
base = GetDefaultLocation(32);
for (int i = 32; i < NUM_MIPS_FPRS; i++) {
for (int i = 32; i < 32 + 128; i++) {
regsInitial[i].location = GetDefaultLocation(i);
}
base = GetDefaultLocation(32 + 128);
for (int i = 32 + 128; i < NUM_MIPS_FPRS; i++) {
regsInitial[i].location = base;
base.IncreaseOffset(sizeof(float));
}
initialReady = true;
}
void FPURegCache::SpillLock(int p1, int p2, int p3, int p4) {
@ -264,9 +268,9 @@ X64Reg FPURegCache::GetFreeXReg() {
}
void FPURegCache::FlushX(X64Reg reg) {
if (reg >= NUM_X_FPREGS)
if (reg >= NUM_X_FPREGS) {
PanicAlert("Flushing non existent reg");
else if (xregs[reg].mipsReg != -1) {
} else if (xregs[reg].mipsReg != -1) {
StoreFromRegister(xregs[reg].mipsReg);
}
}