[RV64_DYNAREC] Added more opcodes for vector (#1981)

This commit is contained in:
Yang Liu 2024-10-30 20:10:47 +08:00 committed by GitHub
parent 74acad3628
commit adb423d96b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 109 additions and 20 deletions

View File

@ -120,6 +120,29 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
VLE8_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
}
break;
case 0x13:
INST_NAME("MOVLPS Ex, Gx");
nextop = F8;
GETG;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 0, VECTOR_SEW64);
if (MODREG) {
ed = (nextop & 7) + (rex.b << 3);
d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 1, VECTOR_SEW64);
if (rv64_xtheadvector) {
VECTOR_LOAD_VMASK(0b01, x4, 1);
VMERGE_VVM(v0, v0, v1); // implies VMASK
} else {
VMV_X_S(x4, v1);
VMV_S_X(v0, x4);
}
} else {
VMV_X_S(x4, v0);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
SD(x4, ed, fixedaddress);
SMWRITE2();
}
break;
case 0x14:
INST_NAME("UNPCKLPS Gx, Ex");
nextop = F8;
@ -251,6 +274,21 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
SMWRITE2();
}
break;
case 0x2B:
INST_NAME("MOVNTPS Ex, Gx");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1);
GETGX_vector(v0, 0, dyn->vector_eew);
if (MODREG) {
ed = (nextop & 7) + (rex.b << 3);
v1 = sse_get_reg_empty_vector(dyn, ninst, x1, ed);
VMV_V_V(v1, v0);
} else {
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0);
VSE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
SMWRITE2();
}
break;
case 0x50:
INST_NAME("MOVMSKPS Gd, Ex");
nextop = F8;
@ -286,6 +324,18 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
GETGX_empty_vector(v1);
VFSQRT_V(v1, v0, VECTOR_UNMASKED);
break;
case 0x52:
if (!box64_dynarec_fastround) return 0;
INST_NAME("RSQRTPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(v0, 0, 0, VECTOR_SEW32);
GETGX_empty_vector(v1);
LUI(x4, 0x3f800);
FMVWX(v0, x4); // 1.0f
VFSQRT_V(v1, v0, VECTOR_UNMASKED);
VFRDIV_VF(v1, v1, v0, VECTOR_UNMASKED);
break;
case 0x53:
INST_NAME("RCPPS Gx, Ex");
nextop = F8;
@ -293,9 +343,8 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
GETEX_vector(v0, 0, 0, VECTOR_SEW32);
GETGX_empty_vector(v1);
LUI(x4, 0x3f800);
d0 = fpu_get_scratch(dyn);
FMVWX(d0, x4); // 1.0f
VFRDIV_VF(v1, v0, d0, VECTOR_UNMASKED);
FMVWX(v0, x4); // 1.0f
VFRDIV_VF(v1, v0, v0, VECTOR_UNMASKED);
break;
case 0x54:
INST_NAME("ANDPS Gx, Ex");
@ -502,6 +551,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
case 0x40 ... 0x4F:
case 0x60 ... 0x7F:
case 0x80 ... 0xBF:
case 0xC3 ... 0xC5:
case 0xC8 ... 0xCF:
return 0;
default:

View File

@ -897,18 +897,15 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
break;
case 0x5B:
if (!box64_dynarec_fastround) return 0;
INST_NAME("CVTPS2DQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(v1, 0, 0, VECTOR_SEW32);
GETGX_empty_vector(v0);
if (box64_dynarec_fastround) {
u8 = sse_setround(dyn, ninst, x6, x4);
VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
x87_restoreround(dyn, ninst, u8);
} else {
return 0;
}
u8 = sse_setround(dyn, ninst, x6, x4);
VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
x87_restoreround(dyn, ninst, u8);
break;
case 0x5C:
INST_NAME("SUBPD Gx, Ex");

View File

@ -36,6 +36,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
int q0, q1;
int d0, d1;
int s0, s1;
uint64_t tmp64u0, tmp64u1;
int64_t fixedaddress, gdoffset;
int unscaled;
@ -466,6 +467,29 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
}
break;
case 0x70:
INST_NAME("PSHUFLW Gx, Ex, Ib");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
GETEX_vector(v1, 0, 1, VECTOR_SEW16);
GETGX_vector(v0, 1, VECTOR_SEW16);
u8 = F8;
d0 = fpu_get_scratch(dyn);
d1 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1);
tmp64u0 = 0x0007000600050004ULL;
MOV64x(x5, tmp64u0);
VMV_S_X(d1, x5);
tmp64u0 = ((((uint64_t)u8 >> 6) & 3) << 48) | ((((uint64_t)u8 >> 4) & 3) << 32) | (((u8 >> 2) & 3) << 16) | (u8 & 3);
MOV64x(x5, tmp64u0);
VSLIDE1UP_VX(d0, d1, x5, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1);
if (v0 == v1) {
v1 = fpu_get_scratch(dyn);
VMV_V_V(v1, v0);
}
VRGATHER_VV(v0, v1, d0, VECTOR_UNMASKED);
break;
case 0xC2:
INST_NAME("CMPSD Gx, Ex, Ib");
nextop = F8;

View File

@ -196,6 +196,27 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VECTOR_LOAD_VMASK(0b0001, x4, 1);
VFSQRT_V(v0, v1, VECTOR_MASKED);
break;
case 0x52:
INST_NAME("RSQRTSS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
if (MODREG) {
GETGX_vector(v0, 1, VECTOR_SEW32);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LWU(x4, ed, fixedaddress);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW32);
}
LUI(x4, 0x3f800);
FMVWX(v1, x4); // 1.0f
VECTOR_LOAD_VMASK(0b0001, x4, 1);
VFSQRT_V(v0, v1, VECTOR_MASKED);
VFRDIV_VF(v0, v0, v1, VECTOR_MASKED);
break;
case 0x53:
INST_NAME("RCPSS Gx, Ex");
nextop = F8;
@ -301,22 +322,19 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
break;
case 0x5B:
if (!box64_dynarec_fastround) return 0;
INST_NAME("CVTTPS2DQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(v1, 0, 0, VECTOR_SEW32);
GETGX_empty_vector(v0);
if (box64_dynarec_fastround) {
if (rv64_xtheadvector) {
ADDI(x4, xZR, 1); // RTZ
FSRM(x4, x4);
VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
FSRM(xZR, x4);
} else {
VFCVT_RTZ_X_F_V(v0, v1, VECTOR_UNMASKED);
}
if (rv64_xtheadvector) {
ADDI(x4, xZR, 1); // RTZ
FSRM(x4, x4);
VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
FSRM(xZR, x4);
} else {
return 0;
VFCVT_RTZ_X_F_V(v0, v1, VECTOR_UNMASKED);
}
break;
case 0x5C: