mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-24 08:39:51 +00:00
IR: Some more VFPU
This commit is contained in:
parent
558bb197c7
commit
45efcda6b1
@ -16,6 +16,7 @@
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "math/math_util.h"
|
||||
|
||||
#include "Core/MemMap.h"
|
||||
@ -57,7 +58,9 @@ namespace MIPSComp {
|
||||
}
|
||||
|
||||
static bool IsConsecutive4(const u8 regs[4]) {
|
||||
return (regs[1] == regs[0] + 1 && regs[2] == regs[1] + 1 && regs[3] == regs[2] + 1);
|
||||
return regs[1] == regs[0] + 1 &&
|
||||
regs[2] == regs[1] + 1 &&
|
||||
regs[3] == regs[2] + 1;
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_VPFX(MIPSOpcode op) {
|
||||
@ -244,15 +247,79 @@ namespace MIPSComp {
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_VVectorInit(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
if (!js.HasNoPrefix())
|
||||
DISABLE;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int type = (op >> 16) & 0xF;
|
||||
int vd = _VD;
|
||||
|
||||
if (sz == 4 && IsVectorColumn(vd)) {
|
||||
u8 dregs[4];
|
||||
GetVectorRegs(dregs, sz, vd);
|
||||
ir.Write(IROp::InitVec4, voffset[dregs[0]], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE));
|
||||
} else if (sz == 1) {
|
||||
ir.Write(IROp::SetConstV, voffset[vd], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f));
|
||||
} else {
|
||||
DISABLE;
|
||||
}
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_VIdt(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
if (!js.HasNoPrefix())
|
||||
DISABLE;
|
||||
|
||||
int vd = _VD;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
if (sz != V_Quad)
|
||||
DISABLE;
|
||||
|
||||
if (!IsVectorColumn(vd))
|
||||
DISABLE;
|
||||
|
||||
u8 dregs[4];
|
||||
GetVectorRegs(dregs, sz, vd);
|
||||
int row = vd & 3;
|
||||
Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row);
|
||||
ir.Write(IROp::InitVec4, voffset[dregs[0]], (int)init);
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
MatrixSize sz = GetMtxSize(op);
|
||||
if (sz != M_4x4) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
// Not really about trying here, it will work if enabled.
|
||||
VectorSize vsz = GetVectorSize(sz);
|
||||
u8 vecs[4];
|
||||
int vd = _VD;
|
||||
if (IsMatrixTransposed(vd)) {
|
||||
// All outputs are transpositionally symmetric, so should be fine.
|
||||
vd = TransposeMatrixReg(vd);
|
||||
}
|
||||
GetMatrixColumns(vd, M_4x4, vecs);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
u8 vec[4];
|
||||
GetVectorRegs(vec, vsz, vecs[i]);
|
||||
// As they are columns, they will be nicely consecutive.
|
||||
Vec4Init init;
|
||||
switch ((op >> 16) & 0xF) {
|
||||
case 3:
|
||||
init = Vec4Init((int)Vec4Init::Set_1000 + i);
|
||||
break;
|
||||
case 6:
|
||||
init = Vec4Init::AllZERO;
|
||||
break;
|
||||
case 7:
|
||||
init = Vec4Init::AllONE;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
ir.Write(IROp::InitVec4, voffset[vec[0]], (int)init);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_VHdp(MIPSOpcode op) {
|
||||
@ -275,7 +342,7 @@ namespace MIPSComp {
|
||||
|
||||
void IRFrontend::Comp_VV2Op(MIPSOpcode op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
// Pre-processing: Eliminate silly no-op VMOVs, common in Wipeout Pure
|
||||
// Eliminate silly no-op VMOVs, common in Wipeout Pure
|
||||
if (((op >> 16) & 0x1f) == 0 && _VS == _VD && js.HasNoPrefix()) {
|
||||
return;
|
||||
}
|
||||
@ -379,7 +446,12 @@ namespace MIPSComp {
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_Viim(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
if (!js.HasNoPrefix())
|
||||
DISABLE;
|
||||
|
||||
u8 dreg = _VT;
|
||||
s32 imm = (s32)(s16)(u16)(op & 0xFFFF);
|
||||
ir.Write(IROp::SetConstV, voffset[dreg], ir.AddConstantFloat((float)imm));
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_Vfim(MIPSOpcode op) {
|
||||
|
@ -85,9 +85,18 @@ static const IRMeta irMeta[] = {
|
||||
{ IROp::FMovToGPR, "FMovToGPR", "GF" },
|
||||
{ IROp::VMovFromGPR, "VMovFromGPR", "VG" },
|
||||
{ IROp::VMovToGPR, "VMovToGPR", "GV" },
|
||||
{ IROp::InitVec4, "InitVec4", "Vv"},
|
||||
{ IROp::FpCondToReg, "FpCondToReg", "G" },
|
||||
{ IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" },
|
||||
{ IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" },
|
||||
|
||||
{ IROp::VSin, "VSin", "VV" },
|
||||
{ IROp::VCos, "VCos", "VV" },
|
||||
{ IROp::VSqrt, "VSqrt", "VV" },
|
||||
{ IROp::VRSqrt, "VRSqrt", "VV" },
|
||||
{ IROp::VRecip, "VRecip", "VV" },
|
||||
{ IROp::VAsin, "VAsin", "VV" },
|
||||
|
||||
{ IROp::Interpret, "Interpret", "_C" },
|
||||
{ IROp::Downcount, "Downcount", "_II" },
|
||||
{ IROp::ExitToConst, "Exit", "C" },
|
||||
@ -177,6 +186,15 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
|
||||
"RCX6",
|
||||
"RCX7",
|
||||
};
|
||||
static const char *initVec4Names[8] = {
|
||||
"[0 0 0 0]",
|
||||
"[1 1 1 1]",
|
||||
"[-1 -1 -1 -1]",
|
||||
"[1 0 0 0]",
|
||||
"[0 1 0 0]",
|
||||
"[0 0 1 0]",
|
||||
"[0 0 0 1]",
|
||||
};
|
||||
|
||||
switch (type) {
|
||||
case 'G':
|
||||
@ -197,6 +215,9 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
|
||||
case 'T':
|
||||
snprintf(buf, bufSize, "%s", vfpuCtrlNames[param]);
|
||||
break;
|
||||
case 'v':
|
||||
snprintf(buf, bufSize, "%s", initVec4Names[param]);
|
||||
break;
|
||||
case '_':
|
||||
case '\0':
|
||||
buf[0] = 0;
|
||||
|
@ -144,6 +144,16 @@ enum class IROp : u8 {
|
||||
VMovFromGPR,
|
||||
VMovToGPR,
|
||||
|
||||
InitVec4,
|
||||
|
||||
// Slow special functions. Used on singles.
|
||||
VSin,
|
||||
VCos,
|
||||
VSqrt,
|
||||
VRSqrt,
|
||||
VRecip,
|
||||
VAsin,
|
||||
|
||||
// Fake/System instructions
|
||||
Interpret,
|
||||
|
||||
@ -181,6 +191,17 @@ enum IRComparison {
|
||||
Bad,
|
||||
};
|
||||
|
||||
// Some common vec4 constants.
|
||||
enum class Vec4Init {
|
||||
AllZERO,
|
||||
AllONE,
|
||||
AllMinusONE,
|
||||
Set_1000,
|
||||
Set_0100,
|
||||
Set_0010,
|
||||
Set_0001,
|
||||
};
|
||||
|
||||
// Hm, unused
|
||||
inline IRComparison Invert(IRComparison comp) {
|
||||
switch (comp) {
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "Core/HLE/HLE.h"
|
||||
#include "Core/HLE/ReplaceTables.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
#include "Core/MIPS/MIPSVFPUUtils.h"
|
||||
|
||||
#include "math/math_util.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
@ -14,6 +15,16 @@
|
||||
#include "Core/MIPS/IR/IRInst.h"
|
||||
#include "Core/MIPS/IR/IRInterpreter.h"
|
||||
|
||||
alignas(16) float vec4InitValues[8][4] = {
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
||||
{ 1.0f, 1.0f, 1.0f, 1.0f },
|
||||
{ -1.0f, -1.0f, -1.0f, -1.0f },
|
||||
{ 1.0f, 0.0f, 0.0f, 0.0f },
|
||||
{ 0.0f, 1.0f, 0.0f, 0.0f },
|
||||
{ 0.0f, 0.0f, 1.0f, 0.0f },
|
||||
{ 0.0f, 0.0f, 0.0f, 1.0f },
|
||||
};
|
||||
|
||||
u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count) {
|
||||
const IRInst *end = inst + count;
|
||||
while (inst != end) {
|
||||
@ -134,6 +145,33 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
|
||||
break;
|
||||
}
|
||||
|
||||
case IROp::InitVec4:
|
||||
#if defined(_M_SSE)
|
||||
_mm_store_ps(&mips->v[inst->dest], _mm_load_ps(vec4InitValues[inst->src1]));
|
||||
#else
|
||||
memcpy(&mips->v[inst->dest + i], vec4InitValues[inst->src1], 4 * sizeof(float));
|
||||
#endif
|
||||
break;
|
||||
|
||||
case IROp::VSin:
|
||||
mips->v[inst->dest] = vfpu_sin(mips->v[inst->src1]);
|
||||
break;
|
||||
case IROp::VCos:
|
||||
mips->v[inst->dest] = vfpu_cos(mips->v[inst->src1]);
|
||||
break;
|
||||
case IROp::VSqrt:
|
||||
mips->v[inst->dest] = sqrtf(mips->v[inst->src1]);
|
||||
break;
|
||||
case IROp::VRSqrt:
|
||||
mips->v[inst->dest] = 1.0f / sqrtf(mips->v[inst->src1]);
|
||||
break;
|
||||
case IROp::VRecip:
|
||||
mips->v[inst->dest] = 1.0f / mips->v[inst->src1];
|
||||
break;
|
||||
case IROp::VAsin:
|
||||
mips->v[inst->dest] = vfpu_asin(mips->v[inst->src1]);
|
||||
break;
|
||||
|
||||
case IROp::ShlImm:
|
||||
mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2;
|
||||
break;
|
||||
|
@ -114,9 +114,9 @@ void IRJit::RunLoopUntil(u64 globalticks) {
|
||||
}
|
||||
while (mips_->downcount >= 0) {
|
||||
u32 inst = Memory::ReadUnchecked_U32(mips_->pc);
|
||||
u32 opcode = inst >> 24;
|
||||
u32 data = inst & 0xFFFFFF;
|
||||
if (opcode == (MIPS_EMUHACK_OPCODE >> 24)) {
|
||||
u32 opcode = inst & 0xFF000000;
|
||||
if (opcode == MIPS_EMUHACK_OPCODE) {
|
||||
u32 data = inst & 0xFFFFFF;
|
||||
IRBlock *block = blocks_.GetBlock(data);
|
||||
mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetConstants(), block->GetNumInstructions());
|
||||
} else {
|
||||
|
@ -340,8 +340,13 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
|
||||
// FP-only instructions don't need to flush immediates.
|
||||
case IROp::FAdd:
|
||||
case IROp::FMul:
|
||||
case IROp::FDiv:
|
||||
// Regularize, to help x86 backends (add.s r0, r1, r0 -> add.s r0, r0, r1)
|
||||
if (inst.src2 == inst.dest && inst.src1 != inst.src2)
|
||||
std::swap(inst.src1, inst.src2);
|
||||
out.Write(inst);
|
||||
break;
|
||||
case IROp::FSub:
|
||||
case IROp::FDiv:
|
||||
case IROp::FNeg:
|
||||
case IROp::FAbs:
|
||||
case IROp::FSqrt:
|
||||
@ -373,6 +378,19 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::InitVec4:
|
||||
out.Write(inst);
|
||||
break;
|
||||
|
||||
case IROp::VSin:
|
||||
case IROp::VCos:
|
||||
case IROp::VSqrt:
|
||||
case IROp::VRSqrt:
|
||||
case IROp::VRecip:
|
||||
case IROp::VAsin:
|
||||
out.Write(inst);
|
||||
break;
|
||||
|
||||
case IROp::ZeroFpCond:
|
||||
case IROp::FCmpUnordered:
|
||||
case IROp::FCmpEqual:
|
||||
|
@ -45,6 +45,10 @@ inline float vfpu_cos(float angle) {
|
||||
return cosf(angle);
|
||||
}
|
||||
|
||||
inline float vfpu_asin(float angle) {
|
||||
return asinf(angle) / M_PI_2;
|
||||
}
|
||||
|
||||
inline void vfpu_sincos(float angle, float &sine, float &cosine) {
|
||||
angle -= floorf(angle * 0.25f) * 4.f;
|
||||
angle *= (float)M_PI_2;
|
||||
@ -127,7 +131,15 @@ int GetNumVectorElements(VectorSize sz);
|
||||
int GetMatrixSide(MatrixSize sz);
|
||||
const char *GetVectorNotation(int reg, VectorSize size);
|
||||
const char *GetMatrixNotation(int reg, MatrixSize size);
|
||||
|
||||
inline bool IsMatrixTransposed(int matrixReg) {
|
||||
return (matrixReg >> 5) & 1;
|
||||
}
|
||||
inline bool IsVectorColumn(int vectorReg) {
|
||||
return !((vectorReg >> 5) & 1);
|
||||
}
|
||||
inline int TransposeMatrixReg(int matrixReg) {
|
||||
return matrixReg ^ 0x20;
|
||||
}
|
||||
int GetVectorOverlap(int reg1, VectorSize size1, int reg2, VectorSize size2);
|
||||
|
||||
float Float16ToFloat32(unsigned short l);
|
||||
|
@ -101,8 +101,7 @@ void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
|
||||
for (int i = 0; i < n; i++)
|
||||
origV[i] = vregs[i];
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
int regnum = (prefix >> (i*2)) & 3;
|
||||
int abs = (prefix >> (8+i)) & 1;
|
||||
int negate = (prefix >> (16+i)) & 1;
|
||||
@ -2142,7 +2141,7 @@ void CosOnly(SinCosArg angle) {
|
||||
}
|
||||
|
||||
void ASinScaled(SinCosArg angle) {
|
||||
sincostemp[0] = asinf(angle) / M_PI_2;
|
||||
sincostemp[0] = vfpu_asin(angle);
|
||||
}
|
||||
|
||||
void SinCosNegSin(SinCosArg angle) {
|
||||
|
Loading…
Reference in New Issue
Block a user