jit-ir: Add load/store reorder and merge passes.

Can do more in merge, potentially.  Maybe it's not useful...
This commit is contained in:
Unknown W. Brackets 2016-05-17 21:24:13 -07:00
parent a9cdf7651e
commit 5534fba72c
3 changed files with 366 additions and 0 deletions

View File

@ -236,6 +236,8 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, std::v
&OptimizeFPMoves,
&PropagateConstants,
&PurgeTemps,
// &ReorderLoadStore,
// &MergeLoadStore,
// &ThreeOpToTwoOp,
};
if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified))

View File

@ -1,4 +1,5 @@
#include <utility>
#include <algorithm>
#include "Common/Log.h"
#include "Core/MIPS/IR/IRInterpreter.h"
@ -744,3 +745,364 @@ bool ReduceLoads(const IRWriter &in, IRWriter &out) {
return logBlocks;
}
static std::vector<IRInst> ReorderLoadStoreOps(std::vector<IRInst> &ops, const u32 *consts) {
if (ops.size() < 2) {
return ops;
}
bool modifiedRegs[256] = {};
for (size_t i = 0, n = ops.size(); i < n - 1; ++i) {
bool modifiesReg = false;
bool usesFloatReg = false;
switch (ops[i].op) {
case IROp::Load8:
case IROp::Load8Ext:
case IROp::Load16:
case IROp::Load16Ext:
case IROp::Load32:
modifiesReg = true;
if (ops[i].src1 == ops[i].dest) {
// Can't ever reorder these, since it changes.
continue;
}
break;
case IROp::Store8:
case IROp::Store16:
case IROp::Store32:
break;
case IROp::LoadFloat:
case IROp::LoadVec4:
usesFloatReg = true;
modifiesReg = true;
break;
case IROp::StoreFloat:
case IROp::StoreVec4:
usesFloatReg = true;
break;
default:
continue;
}
memset(modifiedRegs, 0, sizeof(modifiedRegs));
size_t start = i;
size_t j;
for (j = i; j < n; ++j) {
if (ops[start].op != ops[j].op || ops[start].src1 != ops[j].src1) {
// Incompatible ops, so let's not reorder.
break;
}
if (modifiedRegs[ops[j].dest] || (!usesFloatReg && modifiedRegs[ops[j].src1])) {
// Can't reorder, this reg was modified.
break;
}
if (modifiesReg) {
// Modifies itself, can't reorder this.
if (!usesFloatReg && ops[j].dest == ops[j].src1) {
break;
}
modifiedRegs[ops[j].dest] = true;
}
// Keep going, these operations are compatible.
}
// Everything up to (but not including) j will be sorted, so skip them.
i = j - 1;
size_t end = j;
if (start + 1 < end) {
std::stable_sort(ops.begin() + start, ops.begin() + end, [&](const IRInst &a, const IRInst &b) {
return consts[a.src2] < consts[b.src2];
});
}
}
return ops;
}
bool ReorderLoadStore(const IRWriter &in, IRWriter &out) {
bool logBlocks = false;
enum class RegState : u8 {
UNUSED = 0,
READ = 1,
CHANGED = 2,
};
bool queuing = false;
std::vector<IRInst> loadStoreQueue;
std::vector<IRInst> otherQueue;
RegState otherRegs[256] = {};
auto flushQueue = [&]() {
if (!queuing) {
return;
}
std::vector<IRInst> loadStoreUnsorted = loadStoreQueue;
std::vector<IRInst> loadStoreSorted = ReorderLoadStoreOps(loadStoreQueue, &in.GetConstants()[0]);
if (memcmp(&loadStoreSorted[0], &loadStoreUnsorted[0], sizeof(IRInst) * loadStoreSorted.size()) != 0) {
logBlocks = true;
}
queuing = false;
for (IRInst queued : loadStoreSorted) {
out.Write(queued);
}
for (IRInst queued : otherQueue) {
out.Write(queued);
}
loadStoreQueue.clear();
otherQueue.clear();
memset(otherRegs, 0, sizeof(otherRegs));
};
for (int i = 0; i < (int)in.GetInstructions().size(); i++) {
IRInst inst = in.GetInstructions()[i];
switch (inst.op) {
case IROp::Load8:
case IROp::Load8Ext:
case IROp::Load16:
case IROp::Load16Ext:
case IROp::Load32:
// To move a load up, its dest can't be changed by things we move down.
if (otherRegs[inst.dest] != RegState::UNUSED || otherRegs[inst.src1] == RegState::CHANGED) {
flushQueue();
}
queuing = true;
loadStoreQueue.push_back(inst);
break;
case IROp::Store8:
case IROp::Store16:
case IROp::Store32:
// A store can move above even if it's read, as long as it's not changed by the other ops.
if (otherRegs[inst.src3] == RegState::CHANGED || otherRegs[inst.src1] == RegState::CHANGED) {
flushQueue();
}
queuing = true;
loadStoreQueue.push_back(inst);
break;
case IROp::LoadVec4:
case IROp::LoadFloat:
case IROp::StoreVec4:
case IROp::StoreFloat:
// Floats can always move as long as their address is safe.
if (otherRegs[inst.src1] == RegState::CHANGED) {
flushQueue();
}
queuing = true;
loadStoreQueue.push_back(inst);
break;
case IROp::Sub:
case IROp::Slt:
case IROp::SltU:
case IROp::Add:
case IROp::And:
case IROp::Or:
case IROp::Xor:
case IROp::Shl:
case IROp::Shr:
case IROp::Ror:
case IROp::Sar:
case IROp::MovZ:
case IROp::MovNZ:
case IROp::Max:
case IROp::Min:
// We'll try to move this downward.
otherRegs[inst.dest] = RegState::CHANGED;
if (inst.src1 && otherRegs[inst.src1] != RegState::CHANGED)
otherRegs[inst.src1] = RegState::READ;
if (inst.src2 && otherRegs[inst.src2] != RegState::CHANGED)
otherRegs[inst.src2] = RegState::READ;
otherQueue.push_back(inst);
queuing = true;
break;
case IROp::Neg:
case IROp::Not:
case IROp::BSwap16:
case IROp::BSwap32:
case IROp::Ext8to32:
case IROp::Ext16to32:
case IROp::ReverseBits:
case IROp::Clz:
case IROp::AddConst:
case IROp::SubConst:
case IROp::AndConst:
case IROp::OrConst:
case IROp::XorConst:
case IROp::SltConst:
case IROp::SltUConst:
case IROp::ShlImm:
case IROp::ShrImm:
case IROp::RorImm:
case IROp::SarImm:
case IROp::Mov:
// We'll try to move this downward.
otherRegs[inst.dest] = RegState::CHANGED;
if (inst.src1 && otherRegs[inst.src1] != RegState::CHANGED)
otherRegs[inst.src1] = RegState::READ;
otherQueue.push_back(inst);
queuing = true;
break;
case IROp::SetConst:
// We'll try to move this downward.
otherRegs[inst.dest] = RegState::CHANGED;
otherQueue.push_back(inst);
queuing = true;
break;
case IROp::Mult:
case IROp::MultU:
case IROp::Madd:
case IROp::MaddU:
case IROp::Msub:
case IROp::MsubU:
case IROp::Div:
case IROp::DivU:
if (inst.src1 && otherRegs[inst.src1] != RegState::CHANGED)
otherRegs[inst.src1] = RegState::READ;
if (inst.src2 && otherRegs[inst.src2] != RegState::CHANGED)
otherRegs[inst.src2] = RegState::READ;
otherQueue.push_back(inst);
queuing = true;
break;
case IROp::MfHi:
case IROp::MfLo:
case IROp::FpCondToReg:
otherRegs[inst.dest] = RegState::CHANGED;
otherQueue.push_back(inst);
queuing = true;
break;
case IROp::MtHi:
case IROp::MtLo:
if (inst.src1 && otherRegs[inst.src1] != RegState::CHANGED)
otherRegs[inst.src1] = RegState::READ;
otherQueue.push_back(inst);
queuing = true;
break;
case IROp::Nop:
case IROp::Downcount:
case IROp::ZeroFpCond:
if (queuing) {
// These are freebies. Sometimes helps with delay slots.
otherQueue.push_back(inst);
} else {
out.Write(inst);
}
break;
default:
flushQueue();
out.Write(inst);
break;
}
}
// Can reuse the old constants array - not touching constants in this pass.
for (u32 value : in.GetConstants()) {
out.AddConstant(value);
}
return logBlocks;
}
bool MergeLoadStore(const IRWriter &in, IRWriter &out) {
bool logBlocks = false;
auto opsCompatible = [&](const IRInst &a, const IRInst &b, int dist) {
if (a.op != b.op || a.src1 != b.src1) {
// Not similar enough at all.
return false;
}
u32 off1 = in.GetConstants()[a.src2];
u32 off2 = in.GetConstants()[b.src2];
if (off1 + dist != off2) {
// Not immediately sequential.
return false;
}
return true;
};
for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) {
IRInst inst = in.GetInstructions()[i];
int c = 0;
switch (inst.op) {
case IROp::Store8:
for (c = 1; c < 4 && i + c < n; ++c) {
const IRInst &nextInst = in.GetInstructions()[i + c];
// TODO: Might be nice to check if this is an obvious constant.
if (inst.src3 != nextInst.src3 || inst.src3 != 0) {
break;
}
if (!opsCompatible(inst, nextInst, c)) {
break;
}
}
// Warning: this may generate unaligned stores.
if (c == 2 || c == 3) {
inst.op = IROp::Store16;
out.Write(inst);
// Skip the next one.
++i;
continue;
}
if (c == 4) {
inst.op = IROp::Store32;
out.Write(inst);
// Skip all 4.
i += 3;
continue;
}
out.Write(inst);
break;
case IROp::Store16:
for (c = 1; c < 2 && i + c < n; ++c) {
const IRInst &nextInst = in.GetInstructions()[i + c];
// TODO: Might be nice to check if this is an obvious constant.
if (inst.src3 != nextInst.src3 || inst.src3 != 0) {
break;
}
if (!opsCompatible(inst, nextInst, c * 2)) {
break;
}
}
// Warning: this may generate unaligned stores.
if (c == 2) {
inst.op = IROp::Store32;
out.Write(inst);
// Skip the next one.
++i;
continue;
}
out.Write(inst);
break;
default:
out.Write(inst);
break;
}
}
// Can reuse the old constants array - not touching constants in this pass.
for (u32 value : in.GetConstants()) {
out.AddConstant(value);
}
return logBlocks;
}

View File

@ -11,3 +11,5 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out);
bool ReduceLoads(const IRWriter &in, IRWriter &out);
bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out);
bool OptimizeFPMoves(const IRWriter &in, IRWriter &out);
bool ReorderLoadStore(const IRWriter &in, IRWriter &out);
bool MergeLoadStore(const IRWriter &in, IRWriter &out);