mirror of
https://github.com/ptitSeb/box64.git
synced 2025-02-17 04:49:17 +00:00
[RV64_DYNAREC] Added more opcodes for vector (#1981)
This commit is contained in:
parent
74acad3628
commit
adb423d96b
@ -120,6 +120,29 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
|
||||
VLE8_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
|
||||
}
|
||||
break;
|
||||
case 0x13:
|
||||
INST_NAME("MOVLPS Ex, Gx");
|
||||
nextop = F8;
|
||||
GETG;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 0, VECTOR_SEW64);
|
||||
if (MODREG) {
|
||||
ed = (nextop & 7) + (rex.b << 3);
|
||||
d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 1, VECTOR_SEW64);
|
||||
if (rv64_xtheadvector) {
|
||||
VECTOR_LOAD_VMASK(0b01, x4, 1);
|
||||
VMERGE_VVM(v0, v0, v1); // implies VMASK
|
||||
} else {
|
||||
VMV_X_S(x4, v1);
|
||||
VMV_S_X(v0, x4);
|
||||
}
|
||||
} else {
|
||||
VMV_X_S(x4, v0);
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
|
||||
SD(x4, ed, fixedaddress);
|
||||
SMWRITE2();
|
||||
}
|
||||
break;
|
||||
case 0x14:
|
||||
INST_NAME("UNPCKLPS Gx, Ex");
|
||||
nextop = F8;
|
||||
@ -251,6 +274,21 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
|
||||
SMWRITE2();
|
||||
}
|
||||
break;
|
||||
case 0x2B:
|
||||
INST_NAME("MOVNTPS Ex, Gx");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1);
|
||||
GETGX_vector(v0, 0, dyn->vector_eew);
|
||||
if (MODREG) {
|
||||
ed = (nextop & 7) + (rex.b << 3);
|
||||
v1 = sse_get_reg_empty_vector(dyn, ninst, x1, ed);
|
||||
VMV_V_V(v1, v0);
|
||||
} else {
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0);
|
||||
VSE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
|
||||
SMWRITE2();
|
||||
}
|
||||
break;
|
||||
case 0x50:
|
||||
INST_NAME("MOVMSKPS Gd, Ex");
|
||||
nextop = F8;
|
||||
@ -286,6 +324,18 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
|
||||
GETGX_empty_vector(v1);
|
||||
VFSQRT_V(v1, v0, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0x52:
|
||||
if (!box64_dynarec_fastround) return 0;
|
||||
INST_NAME("RSQRTPS Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
GETEX_vector(v0, 0, 0, VECTOR_SEW32);
|
||||
GETGX_empty_vector(v1);
|
||||
LUI(x4, 0x3f800);
|
||||
FMVWX(v0, x4); // 1.0f
|
||||
VFSQRT_V(v1, v0, VECTOR_UNMASKED);
|
||||
VFRDIV_VF(v1, v1, v0, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0x53:
|
||||
INST_NAME("RCPPS Gx, Ex");
|
||||
nextop = F8;
|
||||
@ -293,9 +343,8 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
|
||||
GETEX_vector(v0, 0, 0, VECTOR_SEW32);
|
||||
GETGX_empty_vector(v1);
|
||||
LUI(x4, 0x3f800);
|
||||
d0 = fpu_get_scratch(dyn);
|
||||
FMVWX(d0, x4); // 1.0f
|
||||
VFRDIV_VF(v1, v0, d0, VECTOR_UNMASKED);
|
||||
FMVWX(v0, x4); // 1.0f
|
||||
VFRDIV_VF(v1, v0, v0, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0x54:
|
||||
INST_NAME("ANDPS Gx, Ex");
|
||||
@ -502,6 +551,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
|
||||
case 0x40 ... 0x4F:
|
||||
case 0x60 ... 0x7F:
|
||||
case 0x80 ... 0xBF:
|
||||
case 0xC3 ... 0xC5:
|
||||
case 0xC8 ... 0xCF:
|
||||
return 0;
|
||||
default:
|
||||
|
@ -897,18 +897,15 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
}
|
||||
break;
|
||||
case 0x5B:
|
||||
if (!box64_dynarec_fastround) return 0;
|
||||
INST_NAME("CVTPS2DQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
GETEX_vector(v1, 0, 0, VECTOR_SEW32);
|
||||
GETGX_empty_vector(v0);
|
||||
if (box64_dynarec_fastround) {
|
||||
u8 = sse_setround(dyn, ninst, x6, x4);
|
||||
VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
u8 = sse_setround(dyn, ninst, x6, x4);
|
||||
VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
case 0x5C:
|
||||
INST_NAME("SUBPD Gx, Ex");
|
||||
|
@ -36,6 +36,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
int q0, q1;
|
||||
int d0, d1;
|
||||
int s0, s1;
|
||||
uint64_t tmp64u0, tmp64u1;
|
||||
int64_t fixedaddress, gdoffset;
|
||||
int unscaled;
|
||||
|
||||
@ -466,6 +467,29 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 0x70:
|
||||
INST_NAME("PSHUFLW Gx, Ex, Ib");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
GETEX_vector(v1, 0, 1, VECTOR_SEW16);
|
||||
GETGX_vector(v0, 1, VECTOR_SEW16);
|
||||
u8 = F8;
|
||||
d0 = fpu_get_scratch(dyn);
|
||||
d1 = fpu_get_scratch(dyn);
|
||||
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1);
|
||||
tmp64u0 = 0x0007000600050004ULL;
|
||||
MOV64x(x5, tmp64u0);
|
||||
VMV_S_X(d1, x5);
|
||||
tmp64u0 = ((((uint64_t)u8 >> 6) & 3) << 48) | ((((uint64_t)u8 >> 4) & 3) << 32) | (((u8 >> 2) & 3) << 16) | (u8 & 3);
|
||||
MOV64x(x5, tmp64u0);
|
||||
VSLIDE1UP_VX(d0, d1, x5, VECTOR_UNMASKED);
|
||||
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1);
|
||||
if (v0 == v1) {
|
||||
v1 = fpu_get_scratch(dyn);
|
||||
VMV_V_V(v1, v0);
|
||||
}
|
||||
VRGATHER_VV(v0, v1, d0, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0xC2:
|
||||
INST_NAME("CMPSD Gx, Ex, Ib");
|
||||
nextop = F8;
|
||||
|
@ -196,6 +196,27 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VECTOR_LOAD_VMASK(0b0001, x4, 1);
|
||||
VFSQRT_V(v0, v1, VECTOR_MASKED);
|
||||
break;
|
||||
case 0x52:
|
||||
INST_NAME("RSQRTSS Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
if (MODREG) {
|
||||
GETGX_vector(v0, 1, VECTOR_SEW32);
|
||||
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
|
||||
} else {
|
||||
SMREAD();
|
||||
v1 = fpu_get_scratch(dyn);
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
|
||||
LWU(x4, ed, fixedaddress);
|
||||
VMV_S_X(v1, x4);
|
||||
GETGX_vector(v0, 1, VECTOR_SEW32);
|
||||
}
|
||||
LUI(x4, 0x3f800);
|
||||
FMVWX(v1, x4); // 1.0f
|
||||
VECTOR_LOAD_VMASK(0b0001, x4, 1);
|
||||
VFSQRT_V(v0, v1, VECTOR_MASKED);
|
||||
VFRDIV_VF(v0, v0, v1, VECTOR_MASKED);
|
||||
break;
|
||||
case 0x53:
|
||||
INST_NAME("RCPSS Gx, Ex");
|
||||
nextop = F8;
|
||||
@ -301,22 +322,19 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
}
|
||||
break;
|
||||
case 0x5B:
|
||||
if (!box64_dynarec_fastround) return 0;
|
||||
INST_NAME("CVTTPS2DQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
GETEX_vector(v1, 0, 0, VECTOR_SEW32);
|
||||
GETGX_empty_vector(v0);
|
||||
if (box64_dynarec_fastround) {
|
||||
if (rv64_xtheadvector) {
|
||||
ADDI(x4, xZR, 1); // RTZ
|
||||
FSRM(x4, x4);
|
||||
VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
|
||||
FSRM(xZR, x4);
|
||||
} else {
|
||||
VFCVT_RTZ_X_F_V(v0, v1, VECTOR_UNMASKED);
|
||||
}
|
||||
if (rv64_xtheadvector) {
|
||||
ADDI(x4, xZR, 1); // RTZ
|
||||
FSRM(x4, x4);
|
||||
VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
|
||||
FSRM(xZR, x4);
|
||||
} else {
|
||||
return 0;
|
||||
VFCVT_RTZ_X_F_V(v0, v1, VECTOR_UNMASKED);
|
||||
}
|
||||
break;
|
||||
case 0x5C:
|
||||
|
Loading…
x
Reference in New Issue
Block a user