mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-17 12:47:46 +00:00
Merge pull request #11835 from unknownbrackets/vfpu-artdink
Correct vocp / vsocp prefix handling
This commit is contained in:
commit
f742b79174
@ -2301,8 +2301,16 @@ namespace MIPSComp
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
|
||||
// S prefix forces the negate flags.
|
||||
js.prefixS |= 0x000F0000;
|
||||
// T prefix forces constants on and regnum to 1.
|
||||
// That means negate still works, and abs activates a different constant.
|
||||
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
|
||||
|
||||
u8 sregs[4], tregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixT(tregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
MIPSReg tempregs[4];
|
||||
@ -2314,10 +2322,9 @@ namespace MIPSComp
|
||||
}
|
||||
}
|
||||
|
||||
MOVI2F(S0, 1.0f, SCRATCHREG1);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
VSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
|
||||
fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]);
|
||||
VADD(fpr.V(tempregs[i]), fpr.V(tregs[i]), fpr.V(sregs[i]));
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
|
@ -1433,12 +1433,21 @@ void ArmJit::CompNEON_Vocp(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
// TODO: Handle T prefix. Right now it uses 1.0f always.
|
||||
|
||||
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
|
||||
// S prefix forces the negate flags.
|
||||
js.prefixS |= 0x000F0000;
|
||||
// T prefix forces constants on and regnum to 1.
|
||||
// That means negate still works, and abs activates a different constant.
|
||||
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
MappedRegs regs = NEONMapDirtyIn(op, sz, sz);
|
||||
MOVI2F_neon(Q0, 1.0f, R0);
|
||||
VSUB(F_32, regs.vd, Q0, regs.vs);
|
||||
VADD(F_32, regs.vd, Q0, regs.vs);
|
||||
NEONApplyPrefixD(regs.vd);
|
||||
|
||||
fpr.ReleaseSpillLocksAndDiscardTemps();
|
||||
|
@ -1952,8 +1952,16 @@ namespace MIPSComp {
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
|
||||
// S prefix forces the negate flags.
|
||||
js.prefixS |= 0x000F0000;
|
||||
// T prefix forces constants on and regnum to 1.
|
||||
// That means negate still works, and abs activates a different constant.
|
||||
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
|
||||
|
||||
u8 sregs[4], tregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixT(tregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
MIPSReg tempregs[4];
|
||||
@ -1967,8 +1975,8 @@ namespace MIPSComp {
|
||||
|
||||
fp.MOVI2F(S0, 1.0f, SCRATCH1);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
fp.FSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
|
||||
fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]);
|
||||
fp.FADD(fpr.V(tempregs[i]), fpr.V(tregs[i]), fpr.V(sregs[i]));
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
|
@ -1846,22 +1846,30 @@ namespace MIPSComp {
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
|
||||
// S prefix forces the negate flags.
|
||||
js.prefixS |= 0x000F0000;
|
||||
// T prefix forces constants on and regnum to 1.
|
||||
// That means negate still works, and abs activates a different constant.
|
||||
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
|
||||
|
||||
u8 sregs[4], tregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
// There's no bits for t, so just reuse s. It'll be constants only.
|
||||
GetVectorRegsPrefixT(tregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
u8 tempregs[4];
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (!IsOverlapSafe(dregs[i], n, sregs)) {
|
||||
tempregs[i] = IRVTEMP_PFX_T + i; // using IRTEMP0 for other things
|
||||
tempregs[i] = IRVTEMP_0 + i;
|
||||
} else {
|
||||
tempregs[i] = dregs[i];
|
||||
}
|
||||
}
|
||||
|
||||
ir.Write(IROp::SetConstF, IRVTEMP_0, ir.AddConstantFloat(1.0f));
|
||||
for (int i = 0; i < n; ++i) {
|
||||
ir.Write(IROp::FSub, tempregs[i], IRVTEMP_0, sregs[i]);
|
||||
ir.Write(IROp::FAdd, tempregs[i], tregs[i], sregs[i]);
|
||||
}
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (dregs[i] != tempregs[i]) {
|
||||
|
@ -544,16 +544,24 @@ namespace MIPSInt
|
||||
|
||||
void Int_Vocp(MIPSOpcode op)
|
||||
{
|
||||
float s[4], d[4];
|
||||
float s[4], t[4], d[4];
|
||||
int vd = _VD;
|
||||
int vs = _VS;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
ReadVector(s, sz, vs);
|
||||
ApplySwizzleS(s, sz);
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++)
|
||||
{
|
||||
// Always positive NaN.
|
||||
d[i] = my_isnan(s[i]) ? fabsf(s[i]) : 1.0f - s[i];
|
||||
|
||||
// S prefix forces the negate flags.
|
||||
u32 sprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX];
|
||||
ApplyPrefixST(s, sprefix | 0x000F0000, sz);
|
||||
|
||||
// T prefix forces constants on and regnum to 1.
|
||||
// That means negate still works, and abs activates a different constant.
|
||||
u32 tprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX];
|
||||
ApplyPrefixST(t, (tprefix & ~0x000000FF) | 0x00000055 | 0x0000F000, sz);
|
||||
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++) {
|
||||
// Always positive NaN. Note that s is always negated from the registers.
|
||||
d[i] = my_isnan(s[i]) ? fabsf(s[i]) : t[i] + s[i];
|
||||
}
|
||||
ApplyPrefixD(d, sz);
|
||||
WriteVector(d, sz, vd);
|
||||
@ -563,22 +571,30 @@ namespace MIPSInt
|
||||
|
||||
void Int_Vsocp(MIPSOpcode op)
|
||||
{
|
||||
float s[4], d[4];
|
||||
float s[4], t[4], d[4];
|
||||
int vd = _VD;
|
||||
int vs = _VS;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
VectorSize outSize = GetDoubleVectorSize(sz);
|
||||
ReadVector(s, sz, vs);
|
||||
ApplySwizzleS(s, sz);
|
||||
|
||||
// S prefix forces negate in even/odd and xxyy swizzle.
|
||||
// abs works, and applies to final position (not source.)
|
||||
u32 sprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX];
|
||||
ApplyPrefixST(s, (sprefix & ~0x000F00FF) | 0x00000050 | 0x00050000, outSize);
|
||||
|
||||
// T prefix forces constants on and regnum to 0, 1, 0, 1.
|
||||
// That means negate still works, and abs activates a different constant.
|
||||
u32 tprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX];
|
||||
ApplyPrefixST(t, (tprefix & ~0x000000FF) | 0x00000011 | 0x0000F000, outSize);
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
float x = s[0];
|
||||
d[0] = nanclamp(1.0f - x, 0.0f, 1.0f);
|
||||
d[1] = nanclamp(x, 0.0f, 1.0f);
|
||||
VectorSize outSize = V_Pair;
|
||||
if (n > 1) {
|
||||
float y = s[1];
|
||||
d[2] = nanclamp(1.0f - y, 0.0f, 1.0f);
|
||||
d[3] = nanclamp(y, 0.0f, 1.0f);
|
||||
outSize = V_Quad;
|
||||
// Essentially D prefix saturation is forced.
|
||||
d[0] = nanclamp(t[0] + s[0], 0.0f, 1.0f);
|
||||
d[1] = nanclamp(t[1] + s[1], 0.0f, 1.0f);
|
||||
if (outSize == V_Quad) {
|
||||
d[2] = nanclamp(t[2] + s[2], 0.0f, 1.0f);
|
||||
d[3] = nanclamp(t[3] + s[3], 0.0f, 1.0f);
|
||||
}
|
||||
ApplyPrefixD(d, sz);
|
||||
WriteVector(d, outSize, vd);
|
||||
@ -1804,11 +1820,33 @@ bad:
|
||||
|
||||
void Int_Vlgb(MIPSOpcode op)
|
||||
{
|
||||
// S & D valid
|
||||
Reporting::ReportMessage("vlgb not implemented");
|
||||
if (!PSP_CoreParameter().headLess) {
|
||||
_dbg_assert_msg_(CPU,0,"vlgb not implemented");
|
||||
// Vector log binary (extract exponent)
|
||||
int vd = _VD;
|
||||
int vs = _VS;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
|
||||
FloatBits d;
|
||||
FloatBits s;
|
||||
|
||||
ReadVector(s.f, sz, vs);
|
||||
// TODO: Test swizzle, t?
|
||||
ApplySwizzleS(s.f, sz);
|
||||
|
||||
if (sz != V_Single) {
|
||||
ERROR_LOG_REPORT(CPU, "vlgb not implemented for size %d", GetNumVectorElements(sz));
|
||||
}
|
||||
for (int i = 0; i < GetNumVectorElements(sz); ++i) {
|
||||
int exp = (s.u[i] & 0x7F800000) >> 23;
|
||||
if (exp == 0xFF) {
|
||||
d.f[i] = s.f[i];
|
||||
} else if (exp == 0) {
|
||||
d.f[i] = -INFINITY;
|
||||
} else {
|
||||
d.f[i] = (float)(exp - 127);
|
||||
}
|
||||
}
|
||||
ApplyPrefixD(d.f, sz);
|
||||
WriteVector(d.f, sz, vd);
|
||||
PC += 4;
|
||||
EatPrefixes();
|
||||
}
|
||||
@ -1889,10 +1927,31 @@ bad:
|
||||
|
||||
void Int_Vsbz(MIPSOpcode op)
|
||||
{
|
||||
Reporting::ReportMessage("vsbz not implemented");
|
||||
if (!PSP_CoreParameter().headLess) {
|
||||
_dbg_assert_msg_(CPU,0,"vsbz not implemented");
|
||||
// Vector scale by zero (set exp to 0 to extract mantissa)
|
||||
int vd = _VD;
|
||||
int vs = _VS;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
|
||||
FloatBits d;
|
||||
FloatBits s;
|
||||
|
||||
ReadVector(s.f, sz, vs);
|
||||
// TODO: Test swizzle, t?
|
||||
ApplySwizzleS(s.f, sz);
|
||||
|
||||
if (sz != V_Single) {
|
||||
ERROR_LOG_REPORT(CPU, "vsbz not implemented for size %d", GetNumVectorElements(sz));
|
||||
}
|
||||
for (int i = 0; i < GetNumVectorElements(sz); ++i) {
|
||||
// NAN and denormals pass through.
|
||||
if (my_isnan(s.f[i]) || (s.u[i] & 0x7F800000) == 0) {
|
||||
d.u[i] = s.u[i];
|
||||
} else {
|
||||
d.u[i] = (127 << 23) | (s.u[i] & 0x007FFFFF);
|
||||
}
|
||||
}
|
||||
ApplyPrefixD(d.f, sz);
|
||||
WriteVector(d.f, sz, vd);
|
||||
PC += 4;
|
||||
EatPrefixes();
|
||||
}
|
||||
|
@ -524,8 +524,8 @@ const MIPSInstruction tableVFPU0[8] = // 011000 xxx ....... . ....... . .......
|
||||
{
|
||||
INSTR("vadd", JITFUNC(Comp_VecDo3), Dis_VectorSet3, Int_VecDo3, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsub", JITFUNC(Comp_VecDo3), Dis_VectorSet3, Int_VecDo3, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
// TODO: Flags may not be correct (prefixes, etc.)
|
||||
INSTR("vsbn", JITFUNC(Comp_Generic), Dis_VectorSet3, Int_Vsbn, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
// TODO: Disasm is wrong.
|
||||
INSTR("vsbn", JITFUNC(Comp_Generic), Dis_VectorSet3, Int_Vsbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
|
||||
INSTR("vdiv", JITFUNC(Comp_VecDo3), Dis_VectorSet3, Int_VecDo3, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
@ -577,20 +577,18 @@ const MIPSInstruction tableVFPU4Jump[32] = // 110100 xxxxx ..... . ....... . ...
|
||||
INVALID,
|
||||
INVALID,
|
||||
//24 - 110100 11 ........ . ....... . .......
|
||||
// TODO: Flags may not be correct (prefixes, etc.)
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
};
|
||||
|
||||
const MIPSInstruction tableVFPU7[32] = // 110100 00001 xxxxx . ....... . .......
|
||||
{
|
||||
// TODO disasm
|
||||
INSTR("vrnds", JITFUNC(Comp_Generic), Dis_Vrnds, Int_Vrnds, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vrndi", JITFUNC(Comp_Generic), Dis_VrndX, Int_VrndX, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vrndf1", JITFUNC(Comp_Generic), Dis_VrndX, Int_VrndX, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
@ -599,9 +597,7 @@ const MIPSInstruction tableVFPU7[32] = // 110100 00001 xxxxx . ....... . .......
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
//8
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
// TODO: Flags may not be correct (prefixes, etc.) Is this the correct encoding? Others say 10110.
|
||||
INSTR("vsbz", JITFUNC(Comp_Generic), Dis_Generic, Int_Vsbz, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INVALID, INVALID, INVALID,
|
||||
INVALID, INVALID, INVALID, INVALID,
|
||||
//16
|
||||
INVALID,
|
||||
INVALID,
|
||||
@ -610,9 +606,8 @@ const MIPSInstruction tableVFPU7[32] = // 110100 00001 xxxxx . ....... . .......
|
||||
|
||||
INVALID,
|
||||
INVALID,
|
||||
INVALID,
|
||||
// TODO: Flags may not be correct (prefixes, etc.)
|
||||
INSTR("vlgb", JITFUNC(Comp_Generic), Dis_Generic, Int_Vlgb, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vsbz", JITFUNC(Comp_Generic), Dis_Generic, Int_Vsbz, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vlgb", JITFUNC(Comp_Generic), Dis_Generic, Int_Vlgb, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
//24
|
||||
INSTR("vuc2i", JITFUNC(Comp_Vx2i), Dis_Vs2i, Int_Vx2i, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX), // Seen in BraveStory, initialization 110100 00001110000 000 0001 0000 0000
|
||||
INSTR("vc2i", JITFUNC(Comp_Vx2i), Dis_Vs2i, Int_Vx2i, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
@ -731,23 +726,18 @@ const MIPSInstruction tableVFPUMatrixSet1[16] = // 111100 11100 .xxxx . .......
|
||||
|
||||
const MIPSInstruction tableVFPU9[32] = // 110100 00010 xxxxx . ....... . .......
|
||||
{
|
||||
// TODO: Flags may not be correct (prefixes, etc.)
|
||||
INSTR("vsrt1", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt1, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
// TODO: Flags may not be correct (prefixes, etc.)
|
||||
INSTR("vsrt2", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt2, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vsrt1", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt1, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsrt2", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt2, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vbfy1", JITFUNC(Comp_Vbfy), Dis_Vbfy, Int_Vbfy, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vbfy2", JITFUNC(Comp_Vbfy), Dis_Vbfy, Int_Vbfy, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
//4
|
||||
INSTR("vocp", JITFUNC(Comp_Vocp), Dis_Vbfy, Int_Vocp, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX), // one's complement
|
||||
INSTR("vsocp", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsocp, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vfad", JITFUNC(Comp_Vhoriz), Dis_Vfad, Int_Vfad, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
// TODO: Flags may not be correct (prefixes, etc.)
|
||||
INSTR("vavg", JITFUNC(Comp_Vhoriz), Dis_Vfad, Int_Vavg, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vavg", JITFUNC(Comp_Vhoriz), Dis_Vfad, Int_Vavg, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
//8
|
||||
// TODO: Flags may not be correct (prefixes, etc.)
|
||||
INSTR("vsrt3", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt3, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
// TODO: Flags may not be correct (prefixes, etc.)
|
||||
INSTR("vsrt4", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt4, IN_OTHER|OUT_OTHER|IS_VFPU),
|
||||
INSTR("vsrt3", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt3, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsrt4", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt4, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsgn", JITFUNC(Comp_Vsgn), Dis_Vbfy, Int_Vsgn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
|
||||
INVALID,
|
||||
//12
|
||||
@ -757,9 +747,7 @@ const MIPSInstruction tableVFPU9[32] = // 110100 00010 xxxxx . ....... . .......
|
||||
INVALID,
|
||||
|
||||
//16
|
||||
// TODO: Flags may not be correct (prefixes, etc.)
|
||||
INSTR("vmfvc", JITFUNC(Comp_Vmfvc), Dis_Vmftvc, Int_Vmfvc, IN_OTHER|IN_VFPU_CC|OUT_OTHER|IS_VFPU),
|
||||
// TODO: Flags may not be correct (prefixes, etc.)
|
||||
INSTR("vmtvc", JITFUNC(Comp_Vmtvc), Dis_Vmftvc, Int_Vmtvc, IN_OTHER|OUT_VFPU_CC|OUT_OTHER|IS_VFPU),
|
||||
INVALID,
|
||||
INVALID,
|
||||
|
@ -2026,12 +2026,24 @@ void Jit::Comp_Vocp(MIPSOpcode op) {
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
|
||||
// S prefix forces the negate flags.
|
||||
js.prefixS |= 0x000F0000;
|
||||
// T prefix forces constants on and regnum to 1.
|
||||
// That means negate still works, and abs activates a different constant.
|
||||
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
|
||||
|
||||
u8 sregs[4], tregs[4], dregs[4];
|
||||
// Actually uses the T prefixes (despite being VS.)
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
if (js.prefixT != 0x0000F055)
|
||||
GetVectorRegsPrefixT(tregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
// Flush SIMD.
|
||||
fpr.SimpleRegsV(sregs, sz, 0);
|
||||
if (js.prefixT != 0x0000F055)
|
||||
fpr.SimpleRegsV(tregs, sz, 0);
|
||||
fpr.SimpleRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
|
||||
|
||||
X64Reg tempxregs[4];
|
||||
@ -2048,11 +2060,17 @@ void Jit::Comp_Vocp(MIPSOpcode op) {
|
||||
}
|
||||
}
|
||||
|
||||
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
|
||||
MOVSS(XMM1, MatR(TEMPREG));
|
||||
if (js.prefixT == 0x0000F055) {
|
||||
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
|
||||
MOVSS(XMM1, MatR(TEMPREG));
|
||||
}
|
||||
for (int i = 0; i < n; ++i) {
|
||||
MOVSS(XMM0, R(XMM1));
|
||||
SUBSS(XMM0, fpr.V(sregs[i]));
|
||||
if (js.prefixT == 0x0000F055) {
|
||||
MOVSS(XMM0, R(XMM1));
|
||||
} else {
|
||||
MOVSS(XMM0, fpr.V(tregs[i]));
|
||||
}
|
||||
ADDSS(XMM0, fpr.V(sregs[i]));
|
||||
MOVSS(tempxregs[i], R(XMM0));
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user