irjit: Add a pass to keep Vec4s in Vec4s.

This commit is contained in:
Unknown W. Brackets 2023-08-27 11:47:02 -07:00
parent f698623645
commit 5f84887dea
3 changed files with 218 additions and 0 deletions

View File

@ -281,6 +281,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &m
&OptimizeFPMoves,
&PropagateConstants,
&PurgeTemps,
&ReduceVec4Flush,
// &ReorderLoadStore,
// &MergeLoadStore,
// &ThreeOpToTwoOp,

View File

@ -6,6 +6,7 @@
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Log.h"
#include "Core/Config.h"
#include "Core/MIPS/MIPSVFPUUtils.h"
#include "Core/MIPS/IR/IRAnalysis.h"
#include "Core/MIPS/IR/IRInterpreter.h"
#include "Core/MIPS/IR/IRPassSimplify.h"
@ -1874,3 +1875,218 @@ bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &o
}
return logBlocks;
}
bool ReduceVec4Flush(const IRWriter &in, IRWriter &out, const IROptions &opts) {
CONDITIONAL_DISABLE;
// Only do this when using a SIMD backend.
if (!opts.preferVec4) {
DISABLE;
}
bool isVec4[256]{};
bool isUsed[256]{};
bool isVec4Dirty[256]{};
auto updateVec4 = [&](char type, IRReg r) {
bool downgraded = false;
switch (type) {
case 'F':
downgraded = isVec4[r & ~3];
isVec4[r & ~3] = false;
isUsed[r] = true;
break;
case 'V':
_dbg_assert_((r & 3) == 0);
isVec4[r] = true;
for (int i = 0; i < 4; ++i)
isUsed[r + i] = true;
break;
case '2':
downgraded = isVec4[r & ~3];
isVec4[r & ~3] = false;
for (int i = 0; i < 2; ++i)
isUsed[r + i] = true;
break;
default:
break;
}
return downgraded;
};
auto updateVec4Dest = [&](char type, IRReg r, uint32_t flags) {
if ((flags & IRFLAG_SRC3) == 0) {
switch (type) {
case 'F':
isVec4Dirty[r & ~3] = false;
break;
case 'V':
_dbg_assert_((r & 3) == 0);
isVec4Dirty[r] = true;
break;
case '2':
isVec4Dirty[r & ~3] = false;
break;
default:
break;
}
}
return updateVec4(type, r);
};
// Checks overlap from r1 to other params.
auto overlapped = [](IRReg r1, int l1, IRReg r2, int l2, IRReg r3 = IRREG_INVALID, int l3 = 0) {
if (r1 < r2 + l2 && r1 + l1 > r2)
return true;
if (r1 < r3 + l3 && r1 + l1 > r3)
return true;
return false;
};
bool logBlocks = false;
int inCount = (int)in.GetInstructions().size();
for (int i = 0; i < inCount; ++i) {
IRInst inst = in.GetInstructions()[i];
const IRMeta *m = GetIRMeta(inst.op);
if ((m->flags & (IRFLAG_EXIT | IRFLAG_BARRIER)) != 0) {
memset(isVec4, 0, sizeof(isVec4));
out.Write(inst);
continue;
}
IRReg temp = IRREG_INVALID;
auto findAvailTempVec4 = [&]() {
// If it's not used yet in this block, we can use it.
// Note: even if the instruction uses it to write, that should be fine.
for (IRReg r = IRVTEMP_PFX_S; r < IRVTEMP_0 + 4; r += 4) {
if (isUsed[r])
continue;
bool usable = true;
for (int j = 1; j < 4; ++j)
usable = usable && !isUsed[r + j];
if (usable) {
temp = r;
// We don't update isUsed because our temporary doesn't need to last.
return true;
}
}
return false;
};
switch (inst.op) {
case IROp::SetConstF:
if (isVec4[inst.dest & ~3] && findAvailTempVec4()) {
// Check if we're setting multiple in a row, this is a bit common.
u8 blendMask = 1 << (inst.dest & 3);
while (i + 1 < inCount) {
IRInst next = in.GetInstructions()[i + 1];
if (next.op != IROp::SetConstF || (next.dest & ~3) != (inst.dest & ~3))
break;
if (next.constant != inst.constant)
break;
blendMask |= 1 << (next.dest & 3);
i++;
}
if (inst.constant == 0) {
out.Write(IROp::Vec4Init, temp, (int)Vec4Init::AllZERO);
} else if (inst.constant == 0x3F800000) {
out.Write(IROp::Vec4Init, temp, (int)Vec4Init::AllONE);
} else if (inst.constant == 0xBF800000) {
out.Write(IROp::Vec4Init, temp, (int)Vec4Init::AllMinusONE);
} else {
out.Write(IROp::SetConstF, temp, out.AddConstant(inst.constant));
out.Write(IROp::Vec4Shuffle, temp, temp, 0);
}
out.Write(IROp::Vec4Blend, inst.dest & ~3, inst.dest & ~3, temp, blendMask);
isVec4Dirty[inst.dest & ~3] = true;
continue;
}
break;
case IROp::FMovFromGPR:
if (isVec4[inst.dest & ~3] && findAvailTempVec4()) {
u8 blendMask = 1 << (inst.dest & 3);
out.Write(IROp::FMovFromGPR, temp, inst.src1);
out.Write(IROp::Vec4Shuffle, temp, temp, 0);
out.Write(IROp::Vec4Blend, inst.dest & ~3, inst.dest & ~3, temp, blendMask);
isVec4Dirty[inst.dest & ~3] = true;
continue;
}
break;
case IROp::LoadFloat:
if (isVec4[inst.dest & ~3] && isVec4Dirty[inst.dest & ~3] && findAvailTempVec4()) {
u8 blendMask = 1 << (inst.dest & 3);
out.Write(inst.op, temp, inst.src1, inst.src2, inst.constant);
out.Write(IROp::Vec4Shuffle, temp, temp, 0);
out.Write(IROp::Vec4Blend, inst.dest & ~3, inst.dest & ~3, temp, blendMask);
isVec4Dirty[inst.dest & ~3] = true;
continue;
}
break;
case IROp::FMov:
if (isVec4[inst.dest & ~3] && (inst.dest & ~3) == (inst.src1 & ~3)) {
// Oh, actually a shuffle?
uint8_t shuffle = (uint8_t)VFPU_SWIZZLE(0, 1, 2, 3);
uint8_t destShift = (inst.dest & 3) * 2;
shuffle = (shuffle & ~(3 << destShift)) | ((inst.src1 & 3) << destShift);
out.Write(IROp::Vec4Shuffle, inst.dest & ~3, inst.dest & ~3, shuffle);
isVec4Dirty[inst.dest & ~3] = true;
continue;
} else if (isVec4[inst.dest & ~3] && isVec4[inst.src1 & ~3] && (inst.dest & 3) == (inst.src1 & 3)) {
// We can turn this directly into a blend, since it's the same lane.
out.Write(IROp::Vec4Blend, inst.dest & ~3, inst.dest & ~3, inst.src1 & ~3, 1 << (inst.dest & 3));
isVec4Dirty[inst.dest & ~3] = true;
continue;
} else if (isVec4[inst.dest & ~3] && isVec4[inst.src1 & ~3] && findAvailTempVec4()) {
// For this, we'll need a temporary to move to the right lane.
uint8_t shuffle = (uint8_t)VFPU_SWIZZLE(inst.src1 & 3, inst.src1 & 3, inst.src1 & 3, inst.src1 & 3);
out.Write(IROp::Vec4Shuffle, temp, inst.src1 & ~3, shuffle);
out.Write(IROp::Vec4Blend, inst.dest & ~3, inst.dest & ~3, temp, 1 << (inst.dest & 3));
isVec4Dirty[inst.dest & ~3] = true;
continue;
}
break;
case IROp::FAdd:
case IROp::FSub:
case IROp::FMul:
case IROp::FDiv:
if (isVec4[inst.dest & ~3] && isVec4Dirty[inst.dest & ~3] && findAvailTempVec4()) {
u8 blendMask = 1 << (inst.dest & 3);
out.Write(inst.op, temp, inst.src1, inst.src2);
out.Write(IROp::Vec4Shuffle, temp, temp, 0);
out.Write(IROp::Vec4Blend, inst.dest & ~3, inst.dest & ~3, temp, blendMask);
isVec4Dirty[inst.dest & ~3] = true;
continue;
}
break;
}
bool downgrade = false;
if (updateVec4Dest(m->types[0], inst.dest, m->flags))
downgrade = true;
if (updateVec4(m->types[1], inst.src1))
downgrade = true;
if (updateVec4(m->types[2], inst.src2))
downgrade = true;
if (downgrade) {
//WARN_LOG(JIT, "Vec4 downgrade by: %s", m->name);
}
out.Write(inst);
}
return logBlocks;
}

View File

@ -15,3 +15,4 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool ReduceVec4Flush(const IRWriter &in, IRWriter &out, const IROptions &opts);