Merge pull request #8732 from unknownbrackets/ir-jit

Add a pass to remove temporary regs
This commit is contained in:
Henrik Rydgård 2016-05-13 10:11:13 +02:00
commit 49b133971c
8 changed files with 259 additions and 42 deletions

View File

@ -363,7 +363,7 @@ void IRFrontend::Comp_JumpReg(MIPSOpcode op) {
break;
}
ir.Write(IROp::ExitToReg, destReg, 0, 0);
ir.Write(IROp::ExitToReg, 0, destReg, 0);
js.compiling = false;
}

View File

@ -85,7 +85,7 @@ namespace MIPSComp {
return IsOverlapSafeAllowS(dreg, -1, sn, sregs, tn, tregs);
}
void IRFrontend::Comp_VPFX(MIPSOpcode op) {
void IRFrontend::Comp_VPFX(MIPSOpcode op) {
CONDITIONAL_DISABLE;
int data = op & 0xFFFFF;
int regnum = (op >> 24) & 3;
@ -223,6 +223,7 @@ namespace MIPSComp {
}
void IRFrontend::Comp_SV(MIPSOpcode op) {
CONDITIONAL_DISABLE;
s32 offset = (signed short)(op & 0xFFFC);
int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5);
MIPSGPReg rs = _RS;
@ -241,6 +242,7 @@ namespace MIPSComp {
}
void IRFrontend::Comp_SVQ(MIPSOpcode op) {
CONDITIONAL_DISABLE;
int imm = (signed short)(op & 0xFFFC);
int vt = (((op >> 16) & 0x1f)) | ((op & 1) << 5);
MIPSGPReg rs = _RS;
@ -280,6 +282,7 @@ namespace MIPSComp {
}
void IRFrontend::Comp_VVectorInit(MIPSOpcode op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
}
@ -289,37 +292,55 @@ namespace MIPSComp {
int vd = _VD;
int n = GetNumVectorElements(sz);
u8 dregs[4];
GetVectorRegs(dregs, sz, vd);
if (sz == 4 && IsVectorColumn(vd)) {
GetVectorRegsPrefixD(dregs, sz, vd);
if (sz == V_Quad && IsConsecutive4(dregs)) {
ir.Write(IROp::Vec4Init, dregs[0], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE));
} else {
for (int i = 0; i < n; i++) {
ir.Write(IROp::SetConstF, dregs[i], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f));
}
}
ApplyPrefixD(dregs, sz);
}
void IRFrontend::Comp_VIdt(MIPSOpcode op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
}
int vd = _VD;
VectorSize sz = GetVecSize(op);
if (sz != V_Quad)
DISABLE;
if (!IsVectorColumn(vd))
DISABLE;
u8 dregs[4];
GetVectorRegs(dregs, sz, vd);
int row = vd & 3;
Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row);
ir.Write(IROp::Vec4Init, dregs[0], (int)init);
GetVectorRegsPrefixD(dregs, sz, vd);
if (sz == 4 && IsConsecutive4(dregs)) {
int row = vd & 3;
Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row);
ir.Write(IROp::Vec4Init, dregs[0], (int)init);
} else {
switch (sz) {
case V_Pair:
ir.Write(IROp::SetConstF, dregs[0], ir.AddConstantFloat((vd & 1) == 0 ? 1.0f : 0.0f));
ir.Write(IROp::SetConstF, dregs[1], ir.AddConstantFloat((vd & 1) == 1 ? 1.0f : 0.0f));
break;
case V_Quad:
ir.Write(IROp::SetConstF, dregs[0], ir.AddConstantFloat((vd & 3) == 0 ? 1.0f : 0.0f));
ir.Write(IROp::SetConstF, dregs[1], ir.AddConstantFloat((vd & 3) == 1 ? 1.0f : 0.0f));
ir.Write(IROp::SetConstF, dregs[2], ir.AddConstantFloat((vd & 3) == 2 ? 1.0f : 0.0f));
ir.Write(IROp::SetConstF, dregs[3], ir.AddConstantFloat((vd & 3) == 3 ? 1.0f : 0.0f));
break;
default:
DISABLE;
}
}
ApplyPrefixD(dregs, sz);
}
void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) {
CONDITIONAL_DISABLE;
MatrixSize sz = GetMtxSize(op);
if (sz != M_4x4) {
DISABLE;
@ -616,6 +637,7 @@ namespace MIPSComp {
}
void IRFrontend::Comp_VV2Op(MIPSOpcode op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
@ -788,7 +810,7 @@ namespace MIPSComp {
switch ((op >> 21) & 0x1f) {
case 3: //mfv / mfvc
// rt = 0, imm = 255 appears to be used as a CPU interlock by some games.
if (rt != 0) {
if (rt != MIPS_REG_ZERO) {
if (imm < 128) { //R(rt) = VI(imm);
ir.Write(IROp::FMovToGPR, rt, vfpuBase + voffset[imm]);
} else {
@ -1076,25 +1098,25 @@ namespace MIPSComp {
GetVectorRegs(tregs, sz, _VT);
GetVectorRegs(dregs, sz, _VD);
// SIMD-optimized implementations
if (msz == M_4x4 && IsConsecutive4(tregs) && IsConsecutive4(dregs)) {
// SIMD-optimized implementations - if sregs[0..3] is consecutive, the rest are too.
if (msz == M_4x4 && IsConsecutive4(sregs) && IsConsecutive4(dregs)) {
int s0 = IRVTEMP_0;
int s1 = IRVTEMP_PFX_T;
if (!IsConsecutive4(sregs)) {
if (!IsConsecutive4(tregs)) {
ir.Write(IROp::Vec4Scale, s0, sregs[0], tregs[0]);
for (int i = 1; i < 4; i++) {
if (!homogenous || (i != n - 1)) {
ir.Write(IROp::Vec4Scale, s1, sregs[i], tregs[i]);
ir.Write(IROp::Vec4Scale, s1, sregs[i * 4], tregs[i]);
ir.Write(IROp::Vec4Add, s0, s0, s1);
} else {
ir.Write(IROp::Vec4Add, s0, s0, sregs[i]);
ir.Write(IROp::Vec4Add, s0, s0, sregs[i * 4]);
}
}
ir.Write(IROp::Vec4Mov, dregs[0], s0);
return;
} else if (!homogenous) {
for (int i = 0; i < 4; i++) {
ir.Write(IROp::Vec4Dot, s0 + i, sregs[i], tregs[0]);
ir.Write(IROp::Vec4Dot, s0 + i, sregs[i * 4], tregs[0]);
}
ir.Write(IROp::Vec4Mov, dregs[0], s0);
return;

View File

@ -143,7 +143,7 @@ void IRFrontend::Comp_ReplacementFunc(MIPSOpcode op) {
} else {
ApplyRoundingMode();
ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8);
ir.Write(IROp::ExitToReg, MIPS_REG_RA, 0, 0);
ir.Write(IROp::ExitToReg, 0, MIPS_REG_RA, 0);
js.compiling = false;
}
} else {
@ -233,6 +233,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, std::v
if (true) {
static const IRPassFunc passes[] = {
&PropagateConstants,
&PurgeTemps,
};
if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified))
logBlocks = 1;

View File

@ -33,8 +33,8 @@ static const IRMeta irMeta[] = {
{ IROp::SltU, "SltU", "GGG" },
{ IROp::SltUConst, "SltUConst", "GGC" },
{ IROp::Clz, "Clz", "GG" },
{ IROp::MovZ, "MovZ", "GGG" },
{ IROp::MovNZ, "MovNZ", "GGG" },
{ IROp::MovZ, "MovZ", "GGG", IRFLAG_SRC3DST },
{ IROp::MovNZ, "MovNZ", "GGG", IRFLAG_SRC3DST },
{ IROp::Max, "Max", "GGG" },
{ IROp::Min, "Min", "GGG" },
{ IROp::BSwap16, "BSwap16", "GG" },
@ -60,11 +60,11 @@ static const IRMeta irMeta[] = {
{ IROp::Load32, "Load32", "GGC" },
{ IROp::LoadFloat, "LoadFloat", "FGC" },
{ IROp::LoadVec4, "LoadVec4", "FGC" },
{ IROp::Store8, "Store8", "GGC" },
{ IROp::Store16, "Store16", "GGC" },
{ IROp::Store32, "Store32", "GGC" },
{ IROp::StoreFloat, "StoreFloat", "FGC" },
{ IROp::StoreVec4, "StoreVec4", "FGC" },
{ IROp::Store8, "Store8", "GGC", IRFLAG_SRC3 },
{ IROp::Store16, "Store16", "GGC", IRFLAG_SRC3 },
{ IROp::Store32, "Store32", "GGC", IRFLAG_SRC3 },
{ IROp::StoreFloat, "StoreFloat", "FGC", IRFLAG_SRC3 },
{ IROp::StoreVec4, "StoreVec4", "FGC", IRFLAG_SRC3 },
{ IROp::FAdd, "FAdd", "FFF" },
{ IROp::FSub, "FSub", "FFF" },
{ IROp::FMul, "FMul", "FFF" },
@ -112,19 +112,19 @@ static const IRMeta irMeta[] = {
{ IROp::Interpret, "Interpret", "_C" },
{ IROp::Downcount, "Downcount", "_II" },
{ IROp::ExitToConst, "Exit", "C" },
{ IROp::ExitToConstIfEq, "ExitIfEq", "CGG" },
{ IROp::ExitToConstIfNeq, "ExitIfNeq", "CGG" },
{ IROp::ExitToConstIfGtZ, "ExitIfGtZ", "CG" },
{ IROp::ExitToConstIfGeZ, "ExitIfGeZ", "CG" },
{ IROp::ExitToConstIfLeZ, "ExitIfLeZ", "CG" },
{ IROp::ExitToConstIfLtZ, "ExitIfLtZ", "CG" },
{ IROp::ExitToReg, "ExitToReg", "G" },
{ IROp::Syscall, "Syscall", "_C" },
{ IROp::Break, "Break", ""},
{ IROp::ExitToConst, "Exit", "C", IRFLAG_EXIT },
{ IROp::ExitToConstIfEq, "ExitIfEq", "CGG", IRFLAG_EXIT },
{ IROp::ExitToConstIfNeq, "ExitIfNeq", "CGG", IRFLAG_EXIT },
{ IROp::ExitToConstIfGtZ, "ExitIfGtZ", "CG", IRFLAG_EXIT },
{ IROp::ExitToConstIfGeZ, "ExitIfGeZ", "CG", IRFLAG_EXIT },
{ IROp::ExitToConstIfLeZ, "ExitIfLeZ", "CG", IRFLAG_EXIT },
{ IROp::ExitToConstIfLtZ, "ExitIfLtZ", "CG", IRFLAG_EXIT },
{ IROp::ExitToReg, "ExitToReg", "_G", IRFLAG_EXIT },
{ IROp::Syscall, "Syscall", "_C", IRFLAG_EXIT },
{ IROp::Break, "Break", "", IRFLAG_EXIT},
{ IROp::SetPC, "SetPC", "_G" },
{ IROp::SetPCConst, "SetPC", "_C" },
{ IROp::CallReplacement, "CallRepl", "_C"},
{ IROp::CallReplacement, "CallRepl", "_C" },
};
const IRMeta *metaIndex[256];

View File

@ -281,6 +281,15 @@ enum {
IRREG_FPCOND = 245,
};
enum IRFlags {
// Uses src3, not dest.
IRFLAG_SRC3 = 0x0001,
// Uses src3 AND dest (i.e. mutates dest.)
IRFLAG_SRC3DST = 0x0002,
// Exit instruction (maybe conditional.)
IRFLAG_EXIT = 0x0004,
};
struct IRMeta {
IROp op;
const char *name;

View File

@ -208,7 +208,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
_mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2])));
#else
for (int i = 0; i < 4; i++)
mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2];
#endif
break;
@ -226,6 +226,12 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break;
case VC_EZ: result = mips->f[inst->src1] == 0.0f; break;
case VC_NZ: result = mips->f[inst->src1] != 0.0f; break;
case VC_EN: result = my_isnan(mips->f[inst->src1]); break;
case VC_NN: result = !my_isnan(mips->f[inst->src1]); break;
case VC_EI: result = my_isinf(mips->f[inst->src1]); break;
case VC_NI: result = !my_isinf(mips->f[inst->src1]); break;
case VC_ES: result = my_isnanorinf(mips->f[inst->src1]); break;
case VC_NS: result = !my_isnanorinf(mips->f[inst->src1]); break;
case VC_TR: result = 1; break;
case VC_FL: result = 0; break;
default:
@ -523,7 +529,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
return constPool[inst->dest];
case IROp::ExitToReg:
return mips->r[inst->dest];
return mips->r[inst->src1];
case IROp::ExitToConstIfEq:
if (mips->r[inst->src1] == mips->r[inst->src2])

View File

@ -369,10 +369,12 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
break;
case IROp::Vec4Init:
case IROp::Vec4Mov:
case IROp::Vec4Add:
case IROp::Vec4Sub:
case IROp::Vec4Mul:
case IROp::Vec4Div:
case IROp::Vec4Dot:
case IROp::Vec4Scale:
case IROp::Vec4Shuffle:
out.Write(inst);
@ -392,6 +394,8 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
gpr.MapDirtyIn(inst.dest, IRREG_VFPU_CTRL_BASE + inst.src1);
goto doDefault;
case IROp::CallReplacement:
case IROp::Break:
case IROp::Syscall:
case IROp::Interpret:
case IROp::ExitToConst:
@ -433,3 +437,176 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
}
return logBlocks;
}
bool IRReadsFromGPR(const IRInst &inst, int reg) {
const IRMeta *m = GetIRMeta(inst.op);
if (m->types[1] == 'G' && inst.src1 == reg) {
return true;
}
if (m->types[2] == 'G' && inst.src2 == reg) {
return true;
}
if ((m->flags & (IRFLAG_SRC3 | IRFLAG_SRC3DST)) != 0 && m->types[0] == 'G' && inst.src3 == reg) {
return true;
}
if (inst.op == IROp::Interpret || inst.op == IROp::CallReplacement) {
return true;
}
return false;
}
int IRDestGPR(const IRInst &inst) {
const IRMeta *m = GetIRMeta(inst.op);
if ((m->flags & IRFLAG_SRC3) == 0 && m->types[0] == 'G') {
return inst.dest;
}
return -1;
}
bool PurgeTemps(const IRWriter &in, IRWriter &out) {
std::vector<IRInst> insts;
insts.reserve(in.GetInstructions().size());
struct Check {
Check(int r, int i, bool rbx) : reg(r), index(i), readByExit(rbx) {
}
int reg;
int index;
bool readByExit;
};
std::vector<Check> checks;
bool logBlocks = false;
for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) {
const IRInst &inst = in.GetInstructions()[i];
const IRMeta *m = GetIRMeta(inst.op);
for (Check &check : checks) {
if (check.reg == 0) {
continue;
}
if (IRReadsFromGPR(inst, check.reg)) {
// Read from, so we can't optimize out.
check.reg = 0;
} else if (check.readByExit && (m->flags & IRFLAG_EXIT) != 0) {
check.reg = 0;
} else if (IRDestGPR(inst) == check.reg) {
// Clobbered, we can optimize out.
// This happens sometimes with temporaries used for constant addresses.
insts[check.index].op = IROp::Mov;
insts[check.index].dest = 0;
insts[check.index].src1 = 0;
check.reg = 0;
}
}
int dest = IRDestGPR(inst);
switch (dest) {
case IRTEMP_0:
case IRTEMP_1:
case IRTEMP_LHS:
case IRTEMP_RHS:
// Unlike other ops, these don't need to persist between blocks.
// So we consider them not read unless proven read.
checks.push_back(Check(dest, i, false));
break;
default:
if (dest > IRTEMP_RHS) {
// These might sometimes be implicitly read/written by other instructions.
break;
}
checks.push_back(Check(dest, i, true));
break;
// Not a GPR output.
case 0:
case -1:
break;
}
// TODO: VFPU temps? Especially for masked dregs.
insts.push_back(inst);
}
for (Check &check : checks) {
if (!check.readByExit && check.reg > 0) {
insts[check.index].op = IROp::Mov;
insts[check.index].dest = 0;
insts[check.index].src1 = 0;
}
}
for (u32 value : in.GetConstants()) {
out.AddConstant(value);
}
for (const IRInst &inst : insts) {
if (inst.op != IROp::Mov || inst.dest != 0 || inst.src1 != 0) {
out.Write(inst);
}
}
return logBlocks;
}
bool ReduceLoads(const IRWriter &in, IRWriter &out) {
for (u32 value : in.GetConstants()) {
out.AddConstant(value);
}
// This tells us to skip an AND op that has been optimized out.
// Maybe we could skip multiple, but that'd slow things down and is pretty uncommon.
int nextSkip = -1;
bool logBlocks = false;
for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) {
IRInst inst = in.GetInstructions()[i];
if (inst.op == IROp::Load32 || inst.op == IROp::Load16 || inst.op == IROp::Load16Ext) {
int dest = IRDestGPR(inst);
for (int j = i + 1; j < n; j++) {
const IRInst &laterInst = in.GetInstructions()[j];
const IRMeta *m = GetIRMeta(laterInst.op);
if ((m->flags & IRFLAG_EXIT) != 0) {
// Exit, so we can't do the optimization.
break;
}
if (IRReadsFromGPR(laterInst, dest)) {
if (IRDestGPR(laterInst) == dest && laterInst.op == IROp::AndConst) {
const u32 mask = in.GetConstants()[laterInst.src2];
// Here we are, maybe we can reduce the load size based on the mask.
if ((mask & 0xffffff00) == 0) {
inst.op = IROp::Load8;
if (mask == 0xff) {
nextSkip = j;
}
} else if ((mask & 0xffff0000) == 0 && inst.op == IROp::Load32) {
inst.op = IROp::Load16;
if (mask == 0xffff) {
nextSkip = j;
}
}
}
// If it was read, we can't do the optimization.
break;
}
if (IRDestGPR(laterInst) == dest) {
// Someone else wrote, so we can't do the optimization.
break;
}
}
}
if (i != nextSkip) {
out.Write(inst);
}
}
return logBlocks;
}

View File

@ -6,3 +6,5 @@ typedef bool (*IRPassFunc)(const IRWriter &in, IRWriter &out);
bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out);
bool PropagateConstants(const IRWriter &in, IRWriter &out);
bool PurgeTemps(const IRWriter &in, IRWriter &out);
bool ReduceLoads(const IRWriter &in, IRWriter &out);