Jit unaligned reads/writes.

This mostly just improves perf on debug, not really on the map for release.
This commit is contained in:
Unknown W. Brackets 2013-02-02 13:12:34 -08:00
parent bab7947be6
commit f777c872e6
3 changed files with 70 additions and 29 deletions

View File

@ -148,44 +148,70 @@ namespace MIPSComp
CompITypeMemWrite(op, 32, (void *) &Memory::Write_U32);
break;
case 134: //lwl
case 34: //lwl
{
Crash();
//u32 shift = (addr & 3) << 3;
//u32 mem = ReadMem32(addr & 0xfffffffc);
//R(rt) = ( u32(R(rt)) & (0x00ffffff >> shift) ) | ( mem << (24 - shift) );
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Looking for lwr rd, offset-3(rs) which makes a pair.
u32 desiredOp = ((op + (4 << 26)) & 0xFFFF0000) + (offset - 3);
if (!js.inDelaySlot && nextOp == desiredOp)
{
EatInstruction(nextOp);
// nextOp has the correct address.
CompITypeMemRead(nextOp, 32, &XEmitter::MOVZX, (void *) &Memory::Read_U32);
}
else
Comp_Generic(op);
}
break;
case 138: //lwr
case 38: //lwr
{
Crash();
//u32 shift = (addr & 3) << 3;
//u32 mem = ReadMem32(addr & 0xfffffffc);
//R(rt) = ( u32(rt) & (0xffffff00 << (24 - shift)) ) | ( mem >> shift );
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Looking for lwl rd, offset+3(rs) which makes a pair.
u32 desiredOp = ((op - (4 << 26)) & 0xFFFF0000) + (offset + 3);
if (!js.inDelaySlot && nextOp == desiredOp)
{
EatInstruction(nextOp);
// op has the correct address.
CompITypeMemRead(op, 32, &XEmitter::MOVZX, (void *) &Memory::Read_U32);
}
else
Comp_Generic(op);
}
break;
case 142: //swl
case 42: //swl
{
Crash();
//u32 shift = (addr & 3) << 3;
//u32 mem = ReadMem32(addr & 0xfffffffc);
//WriteMem32((addr & 0xfffffffc), ( ( u32(R(rt)) >> (24 - shift) ) ) |
// ( mem & (0xffffff00 << shift) ));
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Looking for swr rd, offset-3(rs) which makes a pair.
u32 desiredOp = ((op + (4 << 26)) & 0xFFFF0000) + (offset - 3);
if (!js.inDelaySlot && nextOp == desiredOp)
{
EatInstruction(nextOp);
// nextOp has the correct address.
CompITypeMemWrite(nextOp, 32, (void *) &Memory::Write_U32);
}
else
Comp_Generic(op);
}
break;
case 146: //swr
case 46: //swr
{
Crash();
// u32 shift = (addr & 3) << 3;
// u32 mem = ReadMem32(addr & 0xfffffffc);
//
// WriteMem32((addr & 0xfffffffc), ( ( u32(R(rt)) << shift ) |
// (mem & (0x00ffffff >> (24 - shift)) ) ) );
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Looking for swl rd, offset+3(rs) which makes a pair.
u32 desiredOp = ((op - (4 << 26)) & 0xFFFF0000) + (offset + 3);
if (!js.inDelaySlot && nextOp == desiredOp)
{
EatInstruction(nextOp);
// op has the correct address.
CompITypeMemWrite(op, 32, (void *) &Memory::Write_U32);
}
else
Comp_Generic(op);
}
break;
default:
Comp_Generic(op);
return ;

View File

@ -158,6 +158,18 @@ void Jit::CompileAt(u32 addr)
MIPSCompileOp(op);
}
void Jit::EatInstruction(u32 op)
{
u32 info = MIPSGetInfo(op);
_dbg_assert_msg_(JIT, !(info & DELAYSLOT), "Never eat a branch op.");
_dbg_assert_msg_(JIT, !js.inDelaySlot, "Never eat an instruction inside a delayslot.");
CheckJitBreakpoint(js.compilerPC + 4, 0);
js.numInstructions++;
js.compilerPC += 4;
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
void Jit::Compile(u32 em_address)
{
if (GetSpaceLeft() < 0x10000 || blocks.IsFull())
@ -203,7 +215,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
gpr.Start(mips_, analysis);
fpr.Start(mips_, analysis);
int numInstructions = 0;
js.numInstructions = 0;
while (js.compiling)
{
// Jit breakpoints are quite fast, so let's do them in release too.
@ -215,13 +227,13 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
MIPSCompileOp(inst);
js.compilerPC += 4;
numInstructions++;
js.numInstructions++;
}
b->codeSize = (u32)(GetCodePtr() - b->normalEntry);
NOP();
AlignCode4();
b->originalSize = numInstructions;
b->originalSize = js.numInstructions;
return b->normalEntry;
}

View File

@ -53,6 +53,7 @@ struct JitState
bool cancel;
bool inDelaySlot;
int downcountAmount;
int numInstructions;
bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block
JitBlock *curBlock;
@ -107,8 +108,6 @@ public:
void Compile(u32 em_address); // Compiles a block at current MIPS PC
const u8 *DoJit(u32 em_address, JitBlock *b);
// See CompileDelaySlotFlags for flags.
void CompileDelaySlot(int flags);
void CompileAt(u32 addr);
void Comp_RunBlock(u32 op);
@ -151,6 +150,10 @@ private:
void FlushAll();
void WriteDowncount(int offset = 0);
// See CompileDelaySlotFlags for flags.
void CompileDelaySlot(int flags);
void EatInstruction(u32 op);
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInEAX();
// void WriteRfiExitDestInEAX();