Merge pull request #11835 from unknownbrackets/vfpu-artdink

Correct vocp / vsocp prefix handling
This commit is contained in:
Henrik Rydgård 2019-02-24 09:29:32 +01:00 committed by GitHub
commit f742b79174
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 168 additions and 71 deletions

View File

@ -2301,8 +2301,16 @@ namespace MIPSComp
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], dregs[4];
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js.prefixS |= 0x000F0000;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
u8 sregs[4], tregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixT(tregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
MIPSReg tempregs[4];
@ -2314,10 +2322,9 @@ namespace MIPSComp
}
}
MOVI2F(S0, 1.0f, SCRATCHREG1);
for (int i = 0; i < n; ++i) {
fpr.MapDirtyInV(tempregs[i], sregs[i]);
VSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]);
VADD(fpr.V(tempregs[i]), fpr.V(tregs[i]), fpr.V(sregs[i]));
}
for (int i = 0; i < n; ++i) {

View File

@ -1433,12 +1433,21 @@ void ArmJit::CompNEON_Vocp(MIPSOpcode op) {
DISABLE;
}
// TODO: Handle T prefix. Right now it uses 1.0f always.
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js.prefixS |= 0x000F0000;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
MappedRegs regs = NEONMapDirtyIn(op, sz, sz);
MOVI2F_neon(Q0, 1.0f, R0);
VSUB(F_32, regs.vd, Q0, regs.vs);
VADD(F_32, regs.vd, Q0, regs.vs);
NEONApplyPrefixD(regs.vd);
fpr.ReleaseSpillLocksAndDiscardTemps();

View File

@ -1952,8 +1952,16 @@ namespace MIPSComp {
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], dregs[4];
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js.prefixS |= 0x000F0000;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
u8 sregs[4], tregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixT(tregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
MIPSReg tempregs[4];
@ -1967,8 +1975,8 @@ namespace MIPSComp {
fp.MOVI2F(S0, 1.0f, SCRATCH1);
for (int i = 0; i < n; ++i) {
fpr.MapDirtyInV(tempregs[i], sregs[i]);
fp.FSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]);
fp.FADD(fpr.V(tempregs[i]), fpr.V(tregs[i]), fpr.V(sregs[i]));
}
for (int i = 0; i < n; ++i) {

View File

@ -1846,22 +1846,30 @@ namespace MIPSComp {
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], dregs[4];
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js.prefixS |= 0x000F0000;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
u8 sregs[4], tregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
// There's no bits for t, so just reuse s. It'll be constants only.
GetVectorRegsPrefixT(tregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
u8 tempregs[4];
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], n, sregs)) {
tempregs[i] = IRVTEMP_PFX_T + i; // using IRTEMP0 for other things
tempregs[i] = IRVTEMP_0 + i;
} else {
tempregs[i] = dregs[i];
}
}
ir.Write(IROp::SetConstF, IRVTEMP_0, ir.AddConstantFloat(1.0f));
for (int i = 0; i < n; ++i) {
ir.Write(IROp::FSub, tempregs[i], IRVTEMP_0, sregs[i]);
ir.Write(IROp::FAdd, tempregs[i], tregs[i], sregs[i]);
}
for (int i = 0; i < n; ++i) {
if (dregs[i] != tempregs[i]) {

View File

@ -544,16 +544,24 @@ namespace MIPSInt
void Int_Vocp(MIPSOpcode op)
{
float s[4], d[4];
float s[4], t[4], d[4];
int vd = _VD;
int vs = _VS;
VectorSize sz = GetVecSize(op);
ReadVector(s, sz, vs);
ApplySwizzleS(s, sz);
for (int i = 0; i < GetNumVectorElements(sz); i++)
{
// Always positive NaN.
d[i] = my_isnan(s[i]) ? fabsf(s[i]) : 1.0f - s[i];
// S prefix forces the negate flags.
u32 sprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX];
ApplyPrefixST(s, sprefix | 0x000F0000, sz);
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
u32 tprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX];
ApplyPrefixST(t, (tprefix & ~0x000000FF) | 0x00000055 | 0x0000F000, sz);
for (int i = 0; i < GetNumVectorElements(sz); i++) {
// Always positive NaN. Note that s is always negated from the registers.
d[i] = my_isnan(s[i]) ? fabsf(s[i]) : t[i] + s[i];
}
ApplyPrefixD(d, sz);
WriteVector(d, sz, vd);
@ -563,22 +571,30 @@ namespace MIPSInt
void Int_Vsocp(MIPSOpcode op)
{
float s[4], d[4];
float s[4], t[4], d[4];
int vd = _VD;
int vs = _VS;
VectorSize sz = GetVecSize(op);
VectorSize outSize = GetDoubleVectorSize(sz);
ReadVector(s, sz, vs);
ApplySwizzleS(s, sz);
// S prefix forces negate in even/odd and xxyy swizzle.
// abs works, and applies to final position (not source.)
u32 sprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX];
ApplyPrefixST(s, (sprefix & ~0x000F00FF) | 0x00000050 | 0x00050000, outSize);
// T prefix forces constants on and regnum to 0, 1, 0, 1.
// That means negate still works, and abs activates a different constant.
u32 tprefix = currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX];
ApplyPrefixST(t, (tprefix & ~0x000000FF) | 0x00000011 | 0x0000F000, outSize);
int n = GetNumVectorElements(sz);
float x = s[0];
d[0] = nanclamp(1.0f - x, 0.0f, 1.0f);
d[1] = nanclamp(x, 0.0f, 1.0f);
VectorSize outSize = V_Pair;
if (n > 1) {
float y = s[1];
d[2] = nanclamp(1.0f - y, 0.0f, 1.0f);
d[3] = nanclamp(y, 0.0f, 1.0f);
outSize = V_Quad;
// Essentially D prefix saturation is forced.
d[0] = nanclamp(t[0] + s[0], 0.0f, 1.0f);
d[1] = nanclamp(t[1] + s[1], 0.0f, 1.0f);
if (outSize == V_Quad) {
d[2] = nanclamp(t[2] + s[2], 0.0f, 1.0f);
d[3] = nanclamp(t[3] + s[3], 0.0f, 1.0f);
}
ApplyPrefixD(d, sz);
WriteVector(d, outSize, vd);
@ -1804,11 +1820,33 @@ bad:
void Int_Vlgb(MIPSOpcode op)
{
// S & D valid
Reporting::ReportMessage("vlgb not implemented");
if (!PSP_CoreParameter().headLess) {
_dbg_assert_msg_(CPU,0,"vlgb not implemented");
// Vector log binary (extract exponent)
int vd = _VD;
int vs = _VS;
VectorSize sz = GetVecSize(op);
FloatBits d;
FloatBits s;
ReadVector(s.f, sz, vs);
// TODO: Test swizzle, t?
ApplySwizzleS(s.f, sz);
if (sz != V_Single) {
ERROR_LOG_REPORT(CPU, "vlgb not implemented for size %d", GetNumVectorElements(sz));
}
for (int i = 0; i < GetNumVectorElements(sz); ++i) {
int exp = (s.u[i] & 0x7F800000) >> 23;
if (exp == 0xFF) {
d.f[i] = s.f[i];
} else if (exp == 0) {
d.f[i] = -INFINITY;
} else {
d.f[i] = (float)(exp - 127);
}
}
ApplyPrefixD(d.f, sz);
WriteVector(d.f, sz, vd);
PC += 4;
EatPrefixes();
}
@ -1889,10 +1927,31 @@ bad:
void Int_Vsbz(MIPSOpcode op)
{
Reporting::ReportMessage("vsbz not implemented");
if (!PSP_CoreParameter().headLess) {
_dbg_assert_msg_(CPU,0,"vsbz not implemented");
// Vector scale by zero (set exp to 0 to extract mantissa)
int vd = _VD;
int vs = _VS;
VectorSize sz = GetVecSize(op);
FloatBits d;
FloatBits s;
ReadVector(s.f, sz, vs);
// TODO: Test swizzle, t?
ApplySwizzleS(s.f, sz);
if (sz != V_Single) {
ERROR_LOG_REPORT(CPU, "vsbz not implemented for size %d", GetNumVectorElements(sz));
}
for (int i = 0; i < GetNumVectorElements(sz); ++i) {
// NAN and denormals pass through.
if (my_isnan(s.f[i]) || (s.u[i] & 0x7F800000) == 0) {
d.u[i] = s.u[i];
} else {
d.u[i] = (127 << 23) | (s.u[i] & 0x007FFFFF);
}
}
ApplyPrefixD(d.f, sz);
WriteVector(d.f, sz, vd);
PC += 4;
EatPrefixes();
}

View File

@ -524,8 +524,8 @@ const MIPSInstruction tableVFPU0[8] = // 011000 xxx ....... . ....... . .......
{
INSTR("vadd", JITFUNC(Comp_VecDo3), Dis_VectorSet3, Int_VecDo3, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsub", JITFUNC(Comp_VecDo3), Dis_VectorSet3, Int_VecDo3, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vsbn", JITFUNC(Comp_Generic), Dis_VectorSet3, Int_Vsbn, IN_OTHER|OUT_OTHER|IS_VFPU),
// TODO: Disasm is wrong.
INSTR("vsbn", JITFUNC(Comp_Generic), Dis_VectorSet3, Int_Vsbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INVALID, INVALID, INVALID, INVALID,
INSTR("vdiv", JITFUNC(Comp_VecDo3), Dis_VectorSet3, Int_VecDo3, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
@ -577,20 +577,18 @@ const MIPSInstruction tableVFPU4Jump[32] = // 110100 xxxxx ..... . ....... . ...
INVALID,
INVALID,
//24 - 110100 11 ........ . ....... . .......
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vwbn.s", JITFUNC(Comp_Generic), Dis_Vwbn, Int_Vwbn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
};
const MIPSInstruction tableVFPU7[32] = // 110100 00001 xxxxx . ....... . .......
{
// TODO disasm
INSTR("vrnds", JITFUNC(Comp_Generic), Dis_Vrnds, Int_Vrnds, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vrndi", JITFUNC(Comp_Generic), Dis_VrndX, Int_VrndX, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vrndf1", JITFUNC(Comp_Generic), Dis_VrndX, Int_VrndX, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
@ -599,9 +597,7 @@ const MIPSInstruction tableVFPU7[32] = // 110100 00001 xxxxx . ....... . .......
INVALID, INVALID, INVALID, INVALID,
//8
INVALID, INVALID, INVALID, INVALID,
// TODO: Flags may not be correct (prefixes, etc.) Is this the correct encoding? Others say 10110.
INSTR("vsbz", JITFUNC(Comp_Generic), Dis_Generic, Int_Vsbz, IN_OTHER|OUT_OTHER|IS_VFPU),
INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
//16
INVALID,
INVALID,
@ -610,9 +606,8 @@ const MIPSInstruction tableVFPU7[32] = // 110100 00001 xxxxx . ....... . .......
INVALID,
INVALID,
INVALID,
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vlgb", JITFUNC(Comp_Generic), Dis_Generic, Int_Vlgb, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vsbz", JITFUNC(Comp_Generic), Dis_Generic, Int_Vsbz, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vlgb", JITFUNC(Comp_Generic), Dis_Generic, Int_Vlgb, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
//24
INSTR("vuc2i", JITFUNC(Comp_Vx2i), Dis_Vs2i, Int_Vx2i, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX), // Seen in BraveStory, initialization 110100 00001110000 000 0001 0000 0000
INSTR("vc2i", JITFUNC(Comp_Vx2i), Dis_Vs2i, Int_Vx2i, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
@ -731,23 +726,18 @@ const MIPSInstruction tableVFPUMatrixSet1[16] = // 111100 11100 .xxxx . .......
const MIPSInstruction tableVFPU9[32] = // 110100 00010 xxxxx . ....... . .......
{
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vsrt1", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt1, IN_OTHER|OUT_OTHER|IS_VFPU),
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vsrt2", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt2, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vsrt1", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt1, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsrt2", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt2, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vbfy1", JITFUNC(Comp_Vbfy), Dis_Vbfy, Int_Vbfy, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vbfy2", JITFUNC(Comp_Vbfy), Dis_Vbfy, Int_Vbfy, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
//4
INSTR("vocp", JITFUNC(Comp_Vocp), Dis_Vbfy, Int_Vocp, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX), // one's complement
INSTR("vsocp", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsocp, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vfad", JITFUNC(Comp_Vhoriz), Dis_Vfad, Int_Vfad, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vavg", JITFUNC(Comp_Vhoriz), Dis_Vfad, Int_Vavg, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vavg", JITFUNC(Comp_Vhoriz), Dis_Vfad, Int_Vavg, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
//8
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vsrt3", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt3, IN_OTHER|OUT_OTHER|IS_VFPU),
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vsrt4", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt4, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vsrt3", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt3, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsrt4", JITFUNC(Comp_Generic), Dis_Vbfy, Int_Vsrt4, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsgn", JITFUNC(Comp_Vsgn), Dis_Vbfy, Int_Vsgn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INVALID,
//12
@ -757,9 +747,7 @@ const MIPSInstruction tableVFPU9[32] = // 110100 00010 xxxxx . ....... . .......
INVALID,
//16
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vmfvc", JITFUNC(Comp_Vmfvc), Dis_Vmftvc, Int_Vmfvc, IN_OTHER|IN_VFPU_CC|OUT_OTHER|IS_VFPU),
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vmtvc", JITFUNC(Comp_Vmtvc), Dis_Vmftvc, Int_Vmtvc, IN_OTHER|OUT_VFPU_CC|OUT_OTHER|IS_VFPU),
INVALID,
INVALID,

View File

@ -2026,12 +2026,24 @@ void Jit::Comp_Vocp(MIPSOpcode op) {
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], dregs[4];
// This is a hack that modifies prefixes. We eat them later, so just overwrite.
// S prefix forces the negate flags.
js.prefixS |= 0x000F0000;
// T prefix forces constants on and regnum to 1.
// That means negate still works, and abs activates a different constant.
js.prefixT = (js.prefixT & ~0x000000FF) | 0x00000055 | 0x0000F000;
u8 sregs[4], tregs[4], dregs[4];
// Actually uses the T prefixes (despite being VS.)
GetVectorRegsPrefixS(sregs, sz, _VS);
if (js.prefixT != 0x0000F055)
GetVectorRegsPrefixT(tregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
// Flush SIMD.
fpr.SimpleRegsV(sregs, sz, 0);
if (js.prefixT != 0x0000F055)
fpr.SimpleRegsV(tregs, sz, 0);
fpr.SimpleRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
X64Reg tempxregs[4];
@ -2048,11 +2060,17 @@ void Jit::Comp_Vocp(MIPSOpcode op) {
}
}
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
MOVSS(XMM1, MatR(TEMPREG));
if (js.prefixT == 0x0000F055) {
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
MOVSS(XMM1, MatR(TEMPREG));
}
for (int i = 0; i < n; ++i) {
MOVSS(XMM0, R(XMM1));
SUBSS(XMM0, fpr.V(sregs[i]));
if (js.prefixT == 0x0000F055) {
MOVSS(XMM0, R(XMM1));
} else {
MOVSS(XMM0, fpr.V(tregs[i]));
}
ADDSS(XMM0, fpr.V(sregs[i]));
MOVSS(tempxregs[i], R(XMM0));
}