Merge pull request #1230 from FioraAeterna/constaddr

JIT: improve handling of stores with a known address
This commit is contained in:
skidau 2014-11-05 12:40:38 +11:00
commit 0515ab852e
7 changed files with 194 additions and 161 deletions

View File

@ -186,8 +186,6 @@ void Jit64AsmRoutineManager::GenerateCommon()
GenFifoWrite(16);
fifoDirectWrite32 = AlignCode4();
GenFifoWrite(32);
fifoDirectWriteFloat = AlignCode4();
GenFifoFloatWrite();
frsqrte = AlignCode4();
GenFrsqrte();
fres = AlignCode4();

View File

@ -334,98 +334,54 @@ void Jit64::stX(UGeckoInstruction inst)
int s = inst.RS;
int a = inst.RA;
bool update = inst.OPCD & 1;
s32 offset = (s32)(s16)inst.SIMM_16;
if (a || !update)
bool update = (inst.OPCD & 1) && offset;
FALLBACK_IF(update);
if (!a && update)
PanicAlert("Invalid stX");
int accessSize;
switch (inst.OPCD & ~1)
{
int accessSize;
switch (inst.OPCD & ~1)
case 36: // stw
accessSize = 32;
break;
case 44: // sth
accessSize = 16;
break;
case 38: // stb
accessSize = 8;
break;
default:
_assert_msg_(DYNA_REC, 0, "stX: Invalid access size.");
return;
}
// If we already know the address of the write
if (!a || gpr.R(a).IsImm())
{
u32 addr = (a ? (u32)gpr.R(a).offset : 0) + offset;
bool exception = WriteToConstAddress(accessSize, gpr.R(s), addr, CallerSavedRegistersInUse());
if (update)
{
case 36: // stw
accessSize = 32;
break;
case 44: // sth
accessSize = 16;
break;
case 38: // stb
accessSize = 8;
break;
default:
_assert_msg_(DYNA_REC, 0, "stX: Invalid access size.");
return;
}
if ((a == 0) || gpr.R(a).IsImm())
{
// If we already know the address through constant folding, we can do some
// fun tricks...
u32 addr = ((a == 0) ? 0 : (u32)gpr.R(a).offset);
addr += offset;
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
if (!js.memcheck || !exception)
{
// Helps external systems know which instruction triggered the write
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
MOV(32, R(RSCRATCH2), gpr.R(s));
if (update)
gpr.SetImmediate32(a, addr);
// No need to protect these, they don't touch any state
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
switch (accessSize)
{
case 8:
CALL((void *)asm_routines.fifoDirectWrite8);
break;
case 16:
CALL((void *)asm_routines.fifoDirectWrite16);
break;
case 32:
CALL((void *)asm_routines.fifoDirectWrite32);
break;
}
js.fifoBytesThisBlock += accessSize >> 3;
gpr.UnlockAllX();
return;
}
else if (Memory::IsRAMAddress(addr))
{
MOV(32, R(RSCRATCH), gpr.R(s));
WriteToConstRamAddress(accessSize, RSCRATCH, addr, true);
if (update)
gpr.SetImmediate32(a, addr);
return;
gpr.SetImmediate32(a, addr);
}
else
{
// Helps external systems know which instruction triggered the write
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
{
case 32:
ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr);
break;
case 16:
ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr);
break;
case 8:
ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (update)
gpr.SetImmediate32(a, addr);
return;
gpr.KillImmediate(a, true, true);
MEMCHECK_START(false)
ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END
}
}
}
else
{
gpr.Lock(a, s);
gpr.BindToRegister(a, true, false);
gpr.BindToRegister(a, true, update);
if (gpr.R(s).IsImm())
{
SafeWriteRegToReg(gpr.R(s), gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
@ -446,21 +402,14 @@ void Jit64::stX(UGeckoInstruction inst)
SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
}
if (update && offset)
if (update)
{
MEMCHECK_START(false)
gpr.KillImmediate(a, true, true);
ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END
}
gpr.UnlockAll();
}
else
{
PanicAlert("Invalid stX");
}
gpr.UnlockAll();
}
void Jit64::stXx(UGeckoInstruction inst)

View File

@ -101,11 +101,50 @@ void Jit64::stfXXX(UGeckoInstruction inst)
int s = inst.RS;
int a = inst.RA;
int b = inst.RB;
s32 imm = (s16)inst.SIMM_16;
int accessSize = single ? 32 : 64;
FALLBACK_IF((!indexed && !a) || (update && js.memcheck && a == b));
FALLBACK_IF(update && js.memcheck && a == b);
if (single)
{
fpr.BindToRegister(s, true, false);
ConvertDoubleToSingle(XMM0, fpr.RX(s));
MOVD_xmm(R(RSCRATCH), XMM0);
}
else
{
if (fpr.R(s).IsSimpleReg())
MOVQ_xmm(R(RSCRATCH), fpr.RX(s));
else
MOV(64, R(RSCRATCH), fpr.R(s));
}
if (!indexed && (!a || gpr.R(a).IsImm()))
{
u32 addr = (a ? (u32)gpr.R(a).offset : 0) + imm;
bool exception = WriteToConstAddress(accessSize, R(RSCRATCH), addr, CallerSavedRegistersInUse());
if (update)
{
if (!js.memcheck || !exception)
{
gpr.SetImmediate32(a, addr);
}
else
{
gpr.KillImmediate(a, true, true);
MEMCHECK_START(false)
ADD(32, gpr.R(a), Imm32((u32)imm));
MEMCHECK_END
}
}
fpr.UnlockAll();
gpr.UnlockAll();
return;
}
s32 offset = 0;
s32 imm = (s16)inst.SIMM_16;
if (indexed)
{
if (update)
@ -140,21 +179,8 @@ void Jit64::stfXXX(UGeckoInstruction inst)
MOV(32, R(RSCRATCH2), gpr.R(a));
}
if (single)
{
fpr.BindToRegister(s, true, false);
ConvertDoubleToSingle(XMM0, fpr.RX(s));
SafeWriteF32ToReg(XMM0, RSCRATCH2, offset, CallerSavedRegistersInUse());
fpr.UnlockAll();
}
else
{
if (fpr.R(s).IsSimpleReg())
MOVQ_xmm(R(RSCRATCH), fpr.RX(s));
else
MOV(64, R(RSCRATCH), fpr.R(s));
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, offset, CallerSavedRegistersInUse());
}
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, CallerSavedRegistersInUse());
if (js.memcheck && update)
{
// revert the address change if an exception occurred
@ -162,6 +188,8 @@ void Jit64::stfXXX(UGeckoInstruction inst)
SUB(32, gpr.R(a), indexed ? gpr.R(b) : Imm32(imm));
MEMCHECK_END
}
fpr.UnlockAll();
gpr.UnlockAll();
gpr.UnlockAllX();
}

View File

@ -22,31 +22,13 @@ static int temp32;
void CommonAsmRoutines::GenFifoWrite(int size)
{
// Assume value in RSCRATCH2
PUSH(ESI);
MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
SwapAndStore(size, MComplex(RSCRATCH, ESI, 1, 0), RSCRATCH2);
ADD(32, R(ESI), Imm8(size >> 3));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(ESI);
RET();
}
void CommonAsmRoutines::GenFifoFloatWrite()
{
// Assume value in XMM0
PUSH(ESI);
MOVSS(M(&temp32), XMM0);
MOV(32, R(RSCRATCH2), M(&temp32));
MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
SwapAndStore(32, MComplex(RSCRATCH, RSI, 1, 0), RSCRATCH2);
ADD(32, R(ESI), Imm8(4));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(ESI);
// Assume value in RSCRATCH
u32 gather_pipe = (u32)(u64)GPFifo::m_gatherPipe;
_assert_msg_(DYNA_REC, gather_pipe <= 0x7FFFFFFF, "Gather pipe not in low 2GB of memory!");
MOV(32, R(RSCRATCH2), M(&GPFifo::m_gatherPipeCount));
SwapAndStore(size, MDisp(RSCRATCH2, gather_pipe), RSCRATCH);
ADD(32, R(RSCRATCH2), Imm8(size >> 3));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(RSCRATCH2));
RET();
}
@ -173,8 +155,8 @@ void CommonAsmRoutines::GenFres()
// Safe + Fast Quantizers, originally from JITIL by magumagu
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
static const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
static const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 };
static const float GC_ALIGNED16(m_quantizeTableS[]) =
{
@ -386,7 +368,8 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
// Easy!
const u8* storeSingleFloat = AlignCode4();
SafeWriteF32ToReg(XMM0, RSCRATCH_EXTRA, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
MOVD_xmm(R(RSCRATCH), XMM0);
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET();
/*
if (cpu_info.bSSSE3)

View File

@ -13,7 +13,6 @@ public:
const u8 *fifoDirectWrite8;
const u8 *fifoDirectWrite16;
const u8 *fifoDirectWrite32;
const u8 *fifoDirectWriteFloat;
const u8 *enterCode;

View File

@ -422,6 +422,16 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
}
}
static OpArg SwapImmediate(int accessSize, OpArg reg_value)
{
if (accessSize == 32)
return Imm32(Common::swap32((u32)reg_value.offset));
else if (accessSize == 16)
return Imm16(Common::swap16((u16)reg_value.offset));
else
return Imm8((u8)reg_value.offset);
}
u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{
u8* result = GetWritableCodePtr();
@ -429,14 +439,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
if (reg_value.IsImm())
{
if (swap)
{
if (accessSize == 32)
reg_value = Imm32(Common::swap32((u32)reg_value.offset));
else if (accessSize == 16)
reg_value = Imm16(Common::swap16((u16)reg_value.offset));
else
reg_value = Imm8((u8)reg_value.offset);
}
reg_value = SwapImmediate(accessSize, reg_value);
MOV(accessSize, dest, reg_value);
}
else if (swap)
@ -461,6 +464,68 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
return result;
}
void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize)
{
// No need to protect these, they don't touch any state
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
switch (accessSize)
{
case 8:
CALL((void *)jit->GetAsmRoutines()->fifoDirectWrite8);
break;
case 16:
CALL((void *)jit->GetAsmRoutines()->fifoDirectWrite16);
break;
case 32:
CALL((void *)jit->GetAsmRoutines()->fifoDirectWrite32);
break;
}
jit->js.fifoBytesThisBlock += accessSize >> 3;
}
bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, BitSet32 registersInUse)
{
// If we already know the address through constant folding, we can do some
// fun tricks...
if ((address & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe && accessSize <= 32)
{
if (!arg.IsSimpleReg() || arg.GetSimpleReg() != RSCRATCH)
MOV(32, R(RSCRATCH), arg);
UnsafeWriteGatherPipe(accessSize);
return false;
}
else if (Memory::IsRAMAddress(address))
{
WriteToConstRamAddress(accessSize, arg, address);
return false;
}
else
{
// Helps external systems know which instruction triggered the write
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
{
case 64:
ABI_CallFunctionAC((void *)&Memory::Write_U64, arg, address);
break;
case 32:
ABI_CallFunctionAC((void *)&Memory::Write_U32, arg, address);
break;
case 16:
ABI_CallFunctionAC((void *)&Memory::Write_U16, arg, address);
break;
case 8:
ABI_CallFunctionAC((void *)&Memory::Write_U8, arg, address);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
return true;
}
}
void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags)
{
// set the correct immediate format
@ -565,20 +630,30 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
SetJumpTarget(exit);
}
// Destroys the same as SafeWrite plus RSCRATCH. TODO: see if we can avoid temporaries here
void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, BitSet32 registersInUse, int flags)
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap)
{
// TODO: PSHUFB might be faster if fastmem supported MOVSS.
MOVD_xmm(R(RSCRATCH), xmm_value);
SafeWriteRegToReg(RSCRATCH, reg_addr, 32, offset, registersInUse, flags);
}
X64Reg reg;
if (arg.IsImm())
{
arg = SwapImmediate(accessSize, arg);
MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), arg);
return;
}
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap)
{
if (swap)
SwapAndStore(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), arg);
if (!arg.IsSimpleReg() || (!cpu_info.bMOVBE && swap && arg.GetSimpleReg() != RSCRATCH))
{
MOV(accessSize, R(RSCRATCH), arg);
reg = RSCRATCH;
}
else
MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), R(arg));
{
reg = arg.GetSimpleReg();
}
if (swap)
SwapAndStore(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), reg);
else
MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), R(reg));
}
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm)

View File

@ -87,6 +87,7 @@ public:
return UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap);
}
u8 *UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend);
void UnsafeWriteGatherPipe(int accessSize);
// Generate a load/write from the MMIO handler for a given address. Only
// call for known addresses in MMIO range (MMIO::IsMMIOAddress).
@ -116,9 +117,9 @@ public:
return swap && !cpu_info.bMOVBE && accessSize > 8;
}
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, BitSet32 registersInUse, int flags = 0);
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
void WriteToConstRamAddress(int accessSize, Gen::OpArg arg, u32 address, bool swap = true);
// returns true if an exception could have been caused
bool WriteToConstAddress(int accessSize, Gen::OpArg arg, u32 address, BitSet32 registersInUse);
void JitGetAndClearCAOV(bool oe);
void JitSetCA();
void JitSetCAIf(Gen::CCFlags conditionCode);