IR: Some more VFPU

This commit is contained in:
Henrik Rydgard 2016-05-10 21:50:08 +02:00
parent 558bb197c7
commit 45efcda6b1
8 changed files with 195 additions and 14 deletions

View File

@ -16,6 +16,7 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <cmath>
#include "math/math_util.h"
#include "Core/MemMap.h"
@ -57,7 +58,9 @@ namespace MIPSComp {
}
static bool IsConsecutive4(const u8 regs[4]) {
return (regs[1] == regs[0] + 1 && regs[2] == regs[1] + 1 && regs[3] == regs[2] + 1);
return regs[1] == regs[0] + 1 &&
regs[2] == regs[1] + 1 &&
regs[3] == regs[2] + 1;
}
void IRFrontend::Comp_VPFX(MIPSOpcode op) {
@ -244,15 +247,79 @@ namespace MIPSComp {
}
void IRFrontend::Comp_VVectorInit(MIPSOpcode op) {
DISABLE;
if (!js.HasNoPrefix())
DISABLE;
VectorSize sz = GetVecSize(op);
int type = (op >> 16) & 0xF;
int vd = _VD;
if (sz == 4 && IsVectorColumn(vd)) {
u8 dregs[4];
GetVectorRegs(dregs, sz, vd);
ir.Write(IROp::InitVec4, voffset[dregs[0]], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE));
} else if (sz == 1) {
ir.Write(IROp::SetConstV, voffset[vd], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f));
} else {
DISABLE;
}
}
void IRFrontend::Comp_VIdt(MIPSOpcode op) {
DISABLE;
if (!js.HasNoPrefix())
DISABLE;
int vd = _VD;
VectorSize sz = GetVecSize(op);
if (sz != V_Quad)
DISABLE;
if (!IsVectorColumn(vd))
DISABLE;
u8 dregs[4];
GetVectorRegs(dregs, sz, vd);
int row = vd & 3;
Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row);
ir.Write(IROp::InitVec4, voffset[dregs[0]], (int)init);
}
void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) {
DISABLE;
MatrixSize sz = GetMtxSize(op);
if (sz != M_4x4) {
DISABLE;
}
// Not really about trying here, it will work if enabled.
VectorSize vsz = GetVectorSize(sz);
u8 vecs[4];
int vd = _VD;
if (IsMatrixTransposed(vd)) {
// All outputs are transpositionally symmetric, so should be fine.
vd = TransposeMatrixReg(vd);
}
GetMatrixColumns(vd, M_4x4, vecs);
for (int i = 0; i < 4; i++) {
u8 vec[4];
GetVectorRegs(vec, vsz, vecs[i]);
// As they are columns, they will be nicely consecutive.
Vec4Init init;
switch ((op >> 16) & 0xF) {
case 3:
init = Vec4Init((int)Vec4Init::Set_1000 + i);
break;
case 6:
init = Vec4Init::AllZERO;
break;
case 7:
init = Vec4Init::AllONE;
break;
default:
return;
}
ir.Write(IROp::InitVec4, voffset[vec[0]], (int)init);
}
return;
}
void IRFrontend::Comp_VHdp(MIPSOpcode op) {
@ -275,7 +342,7 @@ namespace MIPSComp {
void IRFrontend::Comp_VV2Op(MIPSOpcode op) {
CONDITIONAL_DISABLE;
// Pre-processing: Eliminate silly no-op VMOVs, common in Wipeout Pure
// Eliminate silly no-op VMOVs, common in Wipeout Pure
if (((op >> 16) & 0x1f) == 0 && _VS == _VD && js.HasNoPrefix()) {
return;
}
@ -379,7 +446,12 @@ namespace MIPSComp {
}
void IRFrontend::Comp_Viim(MIPSOpcode op) {
DISABLE;
if (!js.HasNoPrefix())
DISABLE;
u8 dreg = _VT;
s32 imm = (s32)(s16)(u16)(op & 0xFFFF);
ir.Write(IROp::SetConstV, voffset[dreg], ir.AddConstantFloat((float)imm));
}
void IRFrontend::Comp_Vfim(MIPSOpcode op) {

View File

@ -85,9 +85,18 @@ static const IRMeta irMeta[] = {
{ IROp::FMovToGPR, "FMovToGPR", "GF" },
{ IROp::VMovFromGPR, "VMovFromGPR", "VG" },
{ IROp::VMovToGPR, "VMovToGPR", "GV" },
{ IROp::InitVec4, "InitVec4", "Vv"},
{ IROp::FpCondToReg, "FpCondToReg", "G" },
{ IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" },
{ IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" },
{ IROp::VSin, "VSin", "VV" },
{ IROp::VCos, "VCos", "VV" },
{ IROp::VSqrt, "VSqrt", "VV" },
{ IROp::VRSqrt, "VRSqrt", "VV" },
{ IROp::VRecip, "VRecip", "VV" },
{ IROp::VAsin, "VAsin", "VV" },
{ IROp::Interpret, "Interpret", "_C" },
{ IROp::Downcount, "Downcount", "_II" },
{ IROp::ExitToConst, "Exit", "C" },
@ -177,6 +186,15 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
"RCX6",
"RCX7",
};
static const char *initVec4Names[8] = {
"[0 0 0 0]",
"[1 1 1 1]",
"[-1 -1 -1 -1]",
"[1 0 0 0]",
"[0 1 0 0]",
"[0 0 1 0]",
"[0 0 0 1]",
};
switch (type) {
case 'G':
@ -197,6 +215,9 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
case 'T':
snprintf(buf, bufSize, "%s", vfpuCtrlNames[param]);
break;
case 'v':
snprintf(buf, bufSize, "%s", initVec4Names[param]);
break;
case '_':
case '\0':
buf[0] = 0;

View File

@ -144,6 +144,16 @@ enum class IROp : u8 {
VMovFromGPR,
VMovToGPR,
InitVec4,
// Slow special functions. Used on singles.
VSin,
VCos,
VSqrt,
VRSqrt,
VRecip,
VAsin,
// Fake/System instructions
Interpret,
@ -181,6 +191,17 @@ enum IRComparison {
Bad,
};
// Some common vec4 constants.
enum class Vec4Init {
AllZERO,
AllONE,
AllMinusONE,
Set_1000,
Set_0100,
Set_0010,
Set_0001,
};
// Hm, unused
inline IRComparison Invert(IRComparison comp) {
switch (comp) {

View File

@ -6,6 +6,7 @@
#include "Core/HLE/HLE.h"
#include "Core/HLE/ReplaceTables.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/MIPSVFPUUtils.h"
#include "math/math_util.h"
#include "Common/CommonTypes.h"
@ -14,6 +15,16 @@
#include "Core/MIPS/IR/IRInst.h"
#include "Core/MIPS/IR/IRInterpreter.h"
alignas(16) float vec4InitValues[8][4] = {
{ 0.0f, 0.0f, 0.0f, 0.0f },
{ 1.0f, 1.0f, 1.0f, 1.0f },
{ -1.0f, -1.0f, -1.0f, -1.0f },
{ 1.0f, 0.0f, 0.0f, 0.0f },
{ 0.0f, 1.0f, 0.0f, 0.0f },
{ 0.0f, 0.0f, 1.0f, 0.0f },
{ 0.0f, 0.0f, 0.0f, 1.0f },
};
u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count) {
const IRInst *end = inst + count;
while (inst != end) {
@ -134,6 +145,33 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
break;
}
case IROp::InitVec4:
#if defined(_M_SSE)
_mm_store_ps(&mips->v[inst->dest], _mm_load_ps(vec4InitValues[inst->src1]));
#else
memcpy(&mips->v[inst->dest + i], vec4InitValues[inst->src1], 4 * sizeof(float));
#endif
break;
case IROp::VSin:
mips->v[inst->dest] = vfpu_sin(mips->v[inst->src1]);
break;
case IROp::VCos:
mips->v[inst->dest] = vfpu_cos(mips->v[inst->src1]);
break;
case IROp::VSqrt:
mips->v[inst->dest] = sqrtf(mips->v[inst->src1]);
break;
case IROp::VRSqrt:
mips->v[inst->dest] = 1.0f / sqrtf(mips->v[inst->src1]);
break;
case IROp::VRecip:
mips->v[inst->dest] = 1.0f / mips->v[inst->src1];
break;
case IROp::VAsin:
mips->v[inst->dest] = vfpu_asin(mips->v[inst->src1]);
break;
case IROp::ShlImm:
mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2;
break;

View File

@ -114,9 +114,9 @@ void IRJit::RunLoopUntil(u64 globalticks) {
}
while (mips_->downcount >= 0) {
u32 inst = Memory::ReadUnchecked_U32(mips_->pc);
u32 opcode = inst >> 24;
u32 data = inst & 0xFFFFFF;
if (opcode == (MIPS_EMUHACK_OPCODE >> 24)) {
u32 opcode = inst & 0xFF000000;
if (opcode == MIPS_EMUHACK_OPCODE) {
u32 data = inst & 0xFFFFFF;
IRBlock *block = blocks_.GetBlock(data);
mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetConstants(), block->GetNumInstructions());
} else {

View File

@ -340,8 +340,13 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
// FP-only instructions don't need to flush immediates.
case IROp::FAdd:
case IROp::FMul:
case IROp::FDiv:
// Regularize, to help x86 backends (add.s r0, r1, r0 -> add.s r0, r0, r1)
if (inst.src2 == inst.dest && inst.src1 != inst.src2)
std::swap(inst.src1, inst.src2);
out.Write(inst);
break;
case IROp::FSub:
case IROp::FDiv:
case IROp::FNeg:
case IROp::FAbs:
case IROp::FSqrt:
@ -373,6 +378,19 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
}
break;
case IROp::InitVec4:
out.Write(inst);
break;
case IROp::VSin:
case IROp::VCos:
case IROp::VSqrt:
case IROp::VRSqrt:
case IROp::VRecip:
case IROp::VAsin:
out.Write(inst);
break;
case IROp::ZeroFpCond:
case IROp::FCmpUnordered:
case IROp::FCmpEqual:

View File

@ -45,6 +45,10 @@ inline float vfpu_cos(float angle) {
return cosf(angle);
}
inline float vfpu_asin(float angle) {
return asinf(angle) / M_PI_2;
}
inline void vfpu_sincos(float angle, float &sine, float &cosine) {
angle -= floorf(angle * 0.25f) * 4.f;
angle *= (float)M_PI_2;
@ -127,7 +131,15 @@ int GetNumVectorElements(VectorSize sz);
int GetMatrixSide(MatrixSize sz);
const char *GetVectorNotation(int reg, VectorSize size);
const char *GetMatrixNotation(int reg, MatrixSize size);
inline bool IsMatrixTransposed(int matrixReg) {
return (matrixReg >> 5) & 1;
}
inline bool IsVectorColumn(int vectorReg) {
return !((vectorReg >> 5) & 1);
}
inline int TransposeMatrixReg(int matrixReg) {
return matrixReg ^ 0x20;
}
int GetVectorOverlap(int reg1, VectorSize size1, int reg2, VectorSize size2);
float Float16ToFloat32(unsigned short l);

View File

@ -101,8 +101,7 @@ void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
for (int i = 0; i < n; i++)
origV[i] = vregs[i];
for (int i = 0; i < n; i++)
{
for (int i = 0; i < n; i++) {
int regnum = (prefix >> (i*2)) & 3;
int abs = (prefix >> (8+i)) & 1;
int negate = (prefix >> (16+i)) & 1;
@ -2142,7 +2141,7 @@ void CosOnly(SinCosArg angle) {
}
void ASinScaled(SinCosArg angle) {
sincostemp[0] = asinf(angle) / M_PI_2;
sincostemp[0] = vfpu_asin(angle);
}
void SinCosNegSin(SinCosArg angle) {