[RV64_DYNAREC] Added more opcodes for vector (#1970)

This commit is contained in:
Yang Liu 2024-10-29 03:16:47 +08:00 committed by GitHub
parent 8dee79d380
commit aebfd61539
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 266 additions and 31 deletions

View File

@ -733,6 +733,26 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
if (q0 != q1) VMV_V_V(q0, q1);
}
break;
case 0x22:
INST_NAME("PINSRD Gx, Ed, Ib");
nextop = F8;
SET_ELEMENT_WIDTH(x1, rex.w ? VECTOR_SEW64 : VECTOR_SEW32, 1);
GETGX_vector(q0, 1, dyn->vector_eew);
if (MODREG) {
u8 = (F8) & (rex.w ? 1 : 3);
ed = xRAX + (nextop & 7) + (rex.b << 3);
} else {
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 1);
u8 = (F8) & (rex.w ? 1 : 3);
LDxw(x4, ed, fixedaddress);
ed = x4;
}
VECTOR_LOAD_VMASK((1 << u8), x5, 1);
v0 = fpu_get_scratch(dyn);
VMERGE_VXM(v0, q0, ed); // uses VMASK
VMV_V_V(q0, v0);
break;
default: DEFAULT_VECTOR;
}
break;

View File

@ -33,7 +33,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
uint8_t u8;
uint64_t u64, j64;
int v0, v1;
int q0;
int q0, q1;
int d0, d1;
int s0, s1;
int64_t fixedaddress, gdoffset;
@ -224,6 +224,46 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
}
break;
case 0x58:
INST_NAME("ADDSD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
if (MODREG) {
GETGX_vector(v0, 1, VECTOR_SEW64);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LD(x4, ed, fixedaddress);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW64);
}
if (box64_dynarec_fastnan) {
VECTOR_LOAD_VMASK(0b01, x4, 1);
VFADD_VV(v0, v0, v1, VECTOR_MASKED);
} else {
VFMV_F_S(v0, v0);
VFMV_F_S(v1, v1);
FEQD(x3, v0, v0);
FEQD(x4, v1, v1);
FADDD(v0, v0, v1);
AND(x3, x3, x4);
BEQZ_MARK(x3);
FEQD(x3, v0, v0);
BNEZ_MARK(x3);
FNEGD(v0, v0);
MARK;
if (rv64_xtheadvector) {
d0 = fpu_get_scratch(dyn);
VFMV_S_F(d0, v0);
VECTOR_LOAD_VMASK(0b01, x4, 1);
VMERGE_VVM(v0, v0, d0); // implies VMASK
} else {
VFMV_S_F(v0, v0);
}
}
break;
case 0x59:
INST_NAME("MULSD Gx, Ex");
nextop = F8;
@ -264,6 +304,89 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
}
break;
case 0x5C:
INST_NAME("SUBSD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
if (MODREG) {
GETGX_vector(v0, 1, VECTOR_SEW64);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LD(x4, ed, fixedaddress);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW64);
}
if (box64_dynarec_fastnan) {
VECTOR_LOAD_VMASK(0b01, x4, 1);
VFSUB_VV(v0, v0, v1, VECTOR_MASKED);
} else {
VFMV_F_S(v0, v0);
VFMV_F_S(v1, v1);
FEQD(x3, v0, v0);
FEQD(x4, v1, v1);
FSUBD(v0, v0, v1);
AND(x3, x3, x4);
BEQZ_MARK(x3);
FEQD(x3, v0, v0);
BNEZ_MARK(x3);
FNEGD(v0, v0);
MARK;
if (rv64_xtheadvector) {
d0 = fpu_get_scratch(dyn);
VFMV_S_F(d0, v0);
VECTOR_LOAD_VMASK(0b01, x4, 1);
VMERGE_VVM(v0, v0, d0); // implies VMASK
} else {
VFMV_S_F(v0, v0);
}
}
break;
case 0x5D:
INST_NAME("MINSD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
if (MODREG) {
GETGX_vector(v0, 1, VECTOR_SEW64);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LD(x4, ed, fixedaddress);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW64);
}
if (box64_dynarec_fastnan) {
q0 = fpu_get_scratch(dyn);
VECTOR_LOAD_VMASK(0b01, x4, 1);
VFMIN_VV(q0, v0, v1, VECTOR_MASKED);
VMERGE_VVM(v0, v0, q0);
} else {
d0 = fpu_get_scratch(dyn);
d1 = fpu_get_scratch(dyn);
VFMV_F_S(d0, v0);
VFMV_F_S(d1, v1);
FEQD(x2, d0, d0);
FEQD(x3, d1, d1);
AND(x2, x2, x3);
BEQ_MARK(x2, xZR);
FLED(x2, d1, d0);
BEQ_MARK2(x2, xZR);
MARK;
FMVD(d0, d1);
MARK2;
if (rv64_xtheadvector) {
VFMV_S_F(d0, d0);
VECTOR_LOAD_VMASK(0b0001, x4, 1);
VMERGE_VVM(v0, v0, d0); // implies VMASK
} else {
VFMV_S_F(v0, d0);
}
}
break;
case 0x5E:
INST_NAME("DIVSD Gx, Ex");
nextop = F8;
@ -304,6 +427,49 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VFDIV_VV(v0, v0, v1, VECTOR_MASKED);
}
break;
case 0x5F:
INST_NAME("MAXSD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
if (MODREG) {
GETGX_vector(v0, 1, VECTOR_SEW64);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LD(x4, ed, fixedaddress);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW64);
}
if (box64_dynarec_fastnan) {
q0 = fpu_get_scratch(dyn);
VECTOR_LOAD_VMASK(0b01, x4, 1);
VFMIN_VV(q0, v0, v1, VECTOR_MASKED);
VMERGE_VVM(v0, v0, q0);
} else {
d0 = fpu_get_scratch(dyn);
d1 = fpu_get_scratch(dyn);
VFMV_F_S(d0, v0);
VFMV_F_S(d1, v1);
FEQD(x2, d0, d0);
FEQD(x3, d1, d1);
AND(x2, x2, x3);
BEQ_MARK(x2, xZR);
FLED(x2, d0, d1);
BEQ_MARK2(x2, xZR);
MARK;
FMVD(d0, d1);
MARK2;
if (rv64_xtheadvector) {
VFMV_S_F(d0, d0);
VECTOR_LOAD_VMASK(0b0001, x4, 1);
VMERGE_VVM(v0, v0, d0); // implies VMASK
} else {
VFMV_S_F(v0, d0);
}
}
break;
case 0xC2:
INST_NAME("CMPSD Gx, Ex, Ib");
nextop = F8;

View File

@ -204,24 +204,36 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
v1 = fpu_get_scratch(dyn);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LWU(x4, ed, fixedaddress);
VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW32);
}
d0 = fpu_get_scratch(dyn);
d1 = fpu_get_scratch(dyn);
q0 = fpu_get_scratch(dyn);
q1 = fpu_get_scratch(dyn);
VECTOR_LOAD_VMASK(0b0001, x4, 1);
VMV_V_V(q1, VMASK);
VMFEQ_VV(d0, v0, v0, VECTOR_MASKED);
VMFEQ_VV(d1, v1, v1, VECTOR_MASKED);
VMAND_MM(d0, d0, d1);
VFMIN_VV(q0, v0, v1, VECTOR_MASKED);
VMANDN_MM(VMASK, VMASK, d0);
VMERGE_VVM(v0, v0, v1);
VMAND_MM(VMASK, q1, d0);
VMERGE_VVM(v0, v0, q0);
if (box64_dynarec_fastnan) {
q0 = fpu_get_scratch(dyn);
VECTOR_LOAD_VMASK(0b0001, x4, 1);
VFMIN_VV(q0, v0, v1, VECTOR_MASKED);
VMERGE_VVM(v0, v0, q0);
} else {
d0 = fpu_get_scratch(dyn);
d1 = fpu_get_scratch(dyn);
VFMV_F_S(d0, v0);
VFMV_F_S(d1, v1);
FEQS(x2, d0, d0);
FEQS(x3, d1, d1);
AND(x2, x2, x3);
BEQ_MARK(x2, xZR);
FLES(x2, d1, d0);
BEQ_MARK2(x2, xZR);
MARK;
FMVS(d0, d1);
MARK2;
if (rv64_xtheadvector) {
VFMV_S_F(d0, d0);
VECTOR_LOAD_VMASK(0b0001, x4, 1);
VMERGE_VVM(v0, v0, d0); // implies VMASK
} else {
VFMV_S_F(v0, d0);
}
}
break;
case 0x5F:
INST_NAME("MAXSS Gx, Ex");
@ -235,24 +247,36 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
v1 = fpu_get_scratch(dyn);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LWU(x4, ed, fixedaddress);
VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW32);
}
d0 = fpu_get_scratch(dyn);
d1 = fpu_get_scratch(dyn);
q0 = fpu_get_scratch(dyn);
q1 = fpu_get_scratch(dyn);
VECTOR_LOAD_VMASK(0b0001, x4, 1);
VMV_V_V(q1, VMASK);
VMFEQ_VV(d0, v0, v0, VECTOR_MASKED);
VMFEQ_VV(d1, v1, v1, VECTOR_MASKED);
VMAND_MM(d0, d0, d1);
VFMAX_VV(q0, v0, v1, VECTOR_MASKED);
VMANDN_MM(VMASK, VMASK, d0);
VMERGE_VVM(v0, v0, v1);
VMAND_MM(VMASK, q1, d0);
VMERGE_VVM(v0, v0, q0);
if (box64_dynarec_fastnan) {
q0 = fpu_get_scratch(dyn);
VECTOR_LOAD_VMASK(0b0001, x4, 1);
VFMIN_VV(q0, v0, v1, VECTOR_MASKED);
VMERGE_VVM(v0, v0, q0);
} else {
d0 = fpu_get_scratch(dyn);
d1 = fpu_get_scratch(dyn);
VFMV_F_S(d0, v0);
VFMV_F_S(d1, v1);
FEQS(x2, d0, d0);
FEQS(x3, d1, d1);
AND(x2, x2, x3);
BEQ_MARK(x2, xZR);
FLES(x2, d0, d1);
BEQ_MARK2(x2, xZR);
MARK;
FMVS(d0, d1);
MARK2;
if (rv64_xtheadvector) {
VFMV_S_F(d0, d0);
VECTOR_LOAD_VMASK(0b0001, x4, 1);
VMERGE_VVM(v0, v0, d0); // implies VMASK
} else {
VFMV_S_F(v0, d0);
}
}
break;
case 0xAE:
case 0xB8:

View File

@ -2840,11 +2840,36 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int
ADDI(s1, xZR, 1);
VMV_S_X(vreg, s1);
return;
case 0b0010:
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
MOV64x(s1, 0x100000000ULL);
VMV_S_X(vreg, s1);
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
return;
case 0b0100: {
int scratch = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
VMV_V_I(scratch, 1);
VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED);
VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
return;
}
case 0b0101:
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
VMV_V_I(vreg, 1);
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
return;
case 0b1000: {
int scratch = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
MOV64x(s1, 0x100000000ULL);
VMV_V_X(scratch, s1);
VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED);
VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
return;
}
case 0b1010:
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
MOV64x(s1, 0x100000000ULL);