[RV64_DYNAREC] Added preliminary xtheadvector support (#1892)

* [RV64_DYNAREC] Added preliminary xtheadvector support

* [RV64_DYNAREC] Fixed more unaligned issue
This commit is contained in:
Yang Liu 2024-10-02 15:53:29 +08:00 committed by GitHub
parent a188f4ebfe
commit c43d34d0cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 335 additions and 127 deletions

View File

@ -112,7 +112,8 @@ int rv64_zba = 0;
int rv64_zbb = 0;
int rv64_zbc = 0;
int rv64_zbs = 0;
int rv64_vector = 0;
int rv64_vector = 0; // rvv 1.0 or xtheadvector
int rv64_xtheadvector = 0;
int rv64_vlen = 0;
int rv64_xtheadba = 0;
int rv64_xtheadbb = 0;
@ -516,6 +517,7 @@ HWCAP2_AFP
if (p != NULL && !strcasecmp(p, "vector")) {
RV64_Detect_Function();
rv64_vector = 0;
rv64_xtheadvector = 0;
}
printf_log(LOG_INFO, "Dynarec for RISC-V ");
printf_log(LOG_INFO, "With extension: I M A F D C");
@ -523,16 +525,18 @@ HWCAP2_AFP
if(rv64_zbb) printf_log(LOG_INFO, " Zbb");
if(rv64_zbc) printf_log(LOG_INFO, " Zbc");
if(rv64_zbs) printf_log(LOG_INFO, " Zbs");
if (rv64_vector) printf_log(LOG_INFO, " Vector (vlen: %d)", rv64_vlen);
if (rv64_vector && !rv64_xtheadvector) printf_log(LOG_INFO, " Vector (vlen: %d)", rv64_vlen);
if (rv64_xtheadvector) printf_log(LOG_INFO, " XTheadVector (vlen: %d)", rv64_vlen);
if(rv64_xtheadba) printf_log(LOG_INFO, " XTheadBa");
if(rv64_xtheadbb) printf_log(LOG_INFO, " XTheadBb");
if(rv64_xtheadbs) printf_log(LOG_INFO, " XTheadBs");
if(rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov");
if(rv64_xtheadmemidx) printf_log(LOG_INFO, " XTheadMemIdx");
if(rv64_xtheadmempair) printf_log(LOG_INFO, " XTheadMemPair");
if(rv64_xtheadfmemidx) printf_log(LOG_INFO, " XTheadFMemIdx");
if(rv64_xtheadmac) printf_log(LOG_INFO, " XTheadMac");
if(rv64_xtheadfmv) printf_log(LOG_INFO, " XTheadFmv");
if (rv64_xtheadmempair) printf_log(LOG_INFO, " XTheadMemPair");
// Disable the display since these are only detected but never used.
// if(rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov");
// if(rv64_xtheadmemidx) printf_log(LOG_INFO, " XTheadMemIdx");
// if(rv64_xtheadfmemidx) printf_log(LOG_INFO, " XTheadFMemIdx");
// if(rv64_xtheadmac) printf_log(LOG_INFO, " XTheadMac");
// if(rv64_xtheadfmv) printf_log(LOG_INFO, " XTheadFmv");
#else
#error Unsupported architecture
#endif

View File

@ -98,20 +98,44 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
nextop = F8;
if (MODREG) {
INST_NAME("MOVHLPS Gx, Ex");
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(v0, 1, dyn->vector_eew);
GETEX_vector(v1, 0, 0, VECTOR_SEW64);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(v0, 1, VECTOR_SEW64);
GETEX_vector(v1, 0, 0, VECTOR_SEW64);
} else {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); // unaligned!
GETGX_vector(v0, 1, VECTOR_SEW8);
GETEX_vector(v1, 0, 0, VECTOR_SEW8);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
}
q0 = fpu_get_scratch(dyn);
VSLIDEDOWN_VI(q0, v1, 1, VECTOR_UNMASKED);
VMV_X_S(x4, q0);
VMV_S_X(v0, x4);
if (rv64_xtheadvector) {
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
VMERGE_VVM(v0, v0, q0); // implies VMASK
} else {
VMV_X_S(x4, q0);
VMV_S_X(v0, x4);
}
} else {
INST_NAME("MOVLPS Gx, Ex");
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(v0, 1, VECTOR_SEW64);
GETEX_vector(v1, 0, 0, VECTOR_SEW64);
VMV_X_S(x4, v1);
VMV_S_X(v0, x4);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(v0, 1, VECTOR_SEW64);
GETEX_vector(v1, 0, 0, VECTOR_SEW64);
} else {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); // unaligned!
GETGX_vector(v0, 1, VECTOR_SEW8);
GETEX_vector(v1, 0, 0, VECTOR_SEW8);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
}
if (rv64_xtheadvector) {
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
VMERGE_VVM(v0, v0, v1); // implies VMASK
} else {
VMV_X_S(x4, v1);
VMV_S_X(v0, x4);
}
}
break;
case 0x16:
@ -134,8 +158,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
v1 = fpu_get_scratch(dyn);
MOV64x(x4, 0xFF);
VMV_S_X(VMASK, x4);
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1);
VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
VSLIDEUP_VI(v0, v1, 8, VECTOR_UNMASKED);
}
@ -150,8 +173,13 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
q0 = fpu_get_scratch(dyn);
VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED);
VMV_X_S(x4, q0);
VMV_S_X(v1, x4);
if (rv64_xtheadvector) {
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
VMERGE_VVM(v1, v1, q0); // implies VMASK
} else {
VMV_X_S(x4, q0);
VMV_S_X(v1, x4);
}
} else {
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
q0 = fpu_get_scratch(dyn);
@ -209,6 +237,8 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
}
break;
case 0xC6:
if (rv64_xtheadvector) return 0; // lack of vrgatherei16.vv
INST_NAME("SHUFPS Gx, Ex, Ib");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);

View File

@ -99,14 +99,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
} else {
q0 = fpu_get_scratch(dyn);
VXOR_VV(q0, q0, q0, VECTOR_UNMASKED);
VMV_V_I(VMASK, 0b10);
vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1);
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
VLUXEI64_V(v0, q0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
}
break;
case 0x15:
INST_NAME("PUNPCKHQDQ Gx, Ex");
INST_NAME("UNPCKHPD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
// GX->q[0] = GX->q[1];
@ -114,14 +114,19 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
GETGX_vector(v0, 1, VECTOR_SEW64);
if (MODREG) {
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
q0 == fpu_get_scratch(dyn);
q0 = fpu_get_scratch(dyn);
VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED);
VMV_X_S(x4, q0);
if (v0 != v1) { VMV_V_V(v0, v1); }
VMV_S_X(v0, x4);
if (rv64_xtheadvector) {
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
VMERGE_VVM(v0, v1, q0); // implies VMASK
} else {
if (v0 != v1) { VMV_V_V(v0, v1); }
VMV_X_S(x4, q0);
VMV_S_X(v0, x4);
}
} else {
q0 = fpu_get_scratch(dyn);
VMV_V_I(VMASK, 0b10);
vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1);
VSLIDE1DOWN_VX(v0, v0, xZR, VECTOR_UNMASKED);
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
@ -197,8 +202,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
if (q1 & 1) VMV_V_V(d1, q1);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2);
VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 8, VECTOR_UNMASKED);
MOV64x(x4, 0b0101010101010101);
VMV_S_X(VMASK, x4);
vector_loadmask(dyn, ninst, VMASK, 0b0101010101010101, x4, 2);
VCOMPRESS_VM(d0, v0, VMASK);
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
VCOMPRESS_VM(d1, v0, VMASK);
@ -219,8 +223,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
if (q1 & 1) VMV_V_V(d1, q1);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 4, VECTOR_UNMASKED);
MOV64x(x4, 0b01010101);
VMV_S_X(VMASK, x4);
vector_loadmask(dyn, ninst, VMASK, 0b01010101, x4, 2);
VCOMPRESS_VM(d0, v0, VMASK);
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
VCOMPRESS_VM(d1, v0, VMASK);
@ -238,8 +241,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches!
VWMULSU_VV(v0, q1, q0, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2);
MOV64x(x4, 0b0101010101010101);
VMV_S_X(VMASK, x4);
vector_loadmask(dyn, ninst, VMASK, 0b0101010101010101, x4, 2);
VCOMPRESS_VM(d0, v0, VMASK);
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
VCOMPRESS_VM(d1, v0, VMASK);
@ -307,6 +309,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VADD_VX(q0, q1, xZR, VECTOR_MASKED);
break;
case 0x17:
if (rv64_xtheadvector) return 0; // TODO: VMASK convertion
INST_NAME("PTEST Gx, Ex");
nextop = F8;
SETFLAGS(X_ALL, SF_SET);
@ -613,6 +617,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
opcode = F8;
switch (opcode) {
case 0x0E:
if (rv64_xtheadvector) return 0; // TODO: VMASK convertion
INST_NAME("PBLENDW Gx, Ex, Ib");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
@ -668,6 +674,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
break;
case 0x50:
if (rv64_xtheadvector) return 0; // TODO: VMASK convertion
INST_NAME("PMOVMSKD Gd, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
@ -848,8 +856,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
INST_NAME("PUNPCKLBW Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
MOV64x(x1, 0b1010101010101010);
VMV_V_X(VMASK, x1); // VMASK = 0b1010101010101010
vector_loadmask(dyn, ninst, VMASK, 0b1010101010101010, x1, 1);
v0 = fpu_get_scratch(dyn);
VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 7 7 6 6 5 5 4 4 3 3 2 2 1 1 0 0
GETGX_vector(q0, 1, VECTOR_SEW8);
@ -864,8 +871,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
INST_NAME("PUNPCKLWD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
ADDI(x1, xZR, 0b10101010);
VMV_V_X(VMASK, x1); // VMASK = 0b10101010
vector_loadmask(dyn, ninst, VMASK, 0b10101010, x1, 1);
v0 = fpu_get_scratch(dyn);
VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 3 3 2 2 1 1 0 0
GETGX_vector(q0, 1, VECTOR_SEW16);
@ -880,8 +886,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
INST_NAME("PUNPCKLDQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
ADDI(x1, xZR, 0b1010);
VMV_V_X(VMASK, x1); // VMASK = 0b1010
vector_loadmask(dyn, ninst, VMASK, 0b1010, x1, 1);
v0 = fpu_get_scratch(dyn);
VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 1 1 0 0
GETGX_vector(q0, 1, VECTOR_SEW32);
@ -961,8 +966,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
INST_NAME("PUNPCKHBW Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
ADDI(x1, xZR, 0b1010101010101010);
VMV_V_X(VMASK, x1); // VMASK = 0b1010101010101010
vector_loadmask(dyn, ninst, VMASK, 0b1010101010101010, x1, 1);
v0 = fpu_get_scratch(dyn);
VIOTA_M(v0, VMASK, VECTOR_UNMASKED);
VADD_VI(v0, v0, 8, VECTOR_UNMASKED); // v0 = 15 15 14 14 13 13 12 12 11 11 10 10 9 9 8 8
@ -970,8 +974,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
INST_NAME("PUNPCKHWD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
ADDI(x1, xZR, 0b10101010);
VMV_V_X(VMASK, x1); // VMASK = 0b10101010
vector_loadmask(dyn, ninst, VMASK, 0b10101010, x1, 1);
v0 = fpu_get_scratch(dyn);
VIOTA_M(v0, VMASK, VECTOR_UNMASKED);
VADD_VI(v0, v0, 4, VECTOR_UNMASKED); // v0 = 7 7 6 6 5 5 4 4
@ -979,7 +982,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
INST_NAME("PUNPCKHDQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VMV_V_I(VMASK, 0b1010);
vector_loadmask(dyn, ninst, VMASK, 0b1010, x1, 1);
v0 = fpu_get_scratch(dyn);
VIOTA_M(v0, VMASK, VECTOR_UNMASKED);
VADD_VI(v0, v0, 2, VECTOR_UNMASKED); // v0 = 3 3 2 2
@ -1029,7 +1032,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
} else {
q0 = fpu_get_scratch(dyn);
VXOR_VV(q0, q0, q0, VECTOR_UNMASKED);
VMV_V_I(VMASK, 0b10);
vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1);
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
VLUXEI64_V(v0, q0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
@ -1044,14 +1047,19 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
GETGX_vector(v0, 1, VECTOR_SEW64);
if (MODREG) {
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
q0 == fpu_get_scratch(dyn);
q0 = fpu_get_scratch(dyn);
VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED);
VMV_X_S(x4, q0);
if (v0 != v1) { VMV_V_V(v0, v1); }
VMV_S_X(v0, x4);
if (rv64_xtheadvector) {
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
VMERGE_VVM(v0, v1, q0); // implies VMASK
} else {
if (v0 != v1) { VMV_V_V(v0, v1); }
VMV_X_S(x4, q0);
VMV_S_X(v0, x4);
}
} else {
q0 = fpu_get_scratch(dyn);
VMV_V_I(VMASK, 0b10);
vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1);
VSLIDE1DOWN_VX(v0, v0, xZR, VECTOR_UNMASKED);
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
@ -1069,7 +1077,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
SET_ELEMENT_WIDTH(x3, VECTOR_SEW32, 1);
}
VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
VMV_V_I(VMASK, 1);
vector_loadmask(dyn, ninst, VMASK, 1, x4, 1);
VMERGE_VXM(v0, v0, ed);
break;
case 0x6F:
@ -1088,6 +1096,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
break;
case 0x70:
if (rv64_xtheadvector) return 0; // lack of vrgatherei16.vv
INST_NAME("PSHUFD Gx, Ex, Ib");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
@ -1316,6 +1326,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
break;
case 0xA3 ... 0xC1: return 0;
case 0xC4:
if (rv64_xtheadvector) return 0; // TODO: VMASK convertion
INST_NAME("PINSRW Gx, Ed, Ib");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
@ -1377,7 +1389,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
if (MODREG) {
q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
VMV_V_I(VMASK, 0b01);
vector_loadmask(dyn, ninst, VMASK, 1, x1, 1);
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
q1 = fpu_get_scratch(dyn);
@ -1417,7 +1429,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
if (MODREG) {
q1 = sse_get_reg_empty_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3));
VMV_X_S(x4, q0);
VXOR_VV(q1, q1, q1, VECTOR_UNMASKED);
if (!rv64_xtheadvector) {
VXOR_VV(q1, q1, q1, VECTOR_UNMASKED);
}
VMV_S_X(q1, x4);
} else {
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
@ -1427,6 +1441,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
break;
case 0xD7:
if (rv64_xtheadvector) return 0; // TODO: VMASK convertion
INST_NAME("PMOVMSKB Gd, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -1503,6 +1519,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VAND_VV(q0, q1, q0, VECTOR_UNMASKED);
break;
case 0xE0:
if (rv64_xtheadvector) return 0; // lack of vaddu.vv
INST_NAME("PAVGB Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -1516,7 +1534,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(q0, 1, VECTOR_SEW64);
VMV_V_I(VMASK, 0b01);
vector_loadmask(dyn, ninst, VMASK, 1, x1, 1);
if (MODREG) {
q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
@ -1537,7 +1555,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(q0, 1, VECTOR_SEW64);
VMV_V_I(VMASK, 0b01);
vector_loadmask(dyn, ninst, VMASK, 1, x1, 1);
if (MODREG) {
q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
@ -1554,6 +1572,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VSRA_VX(q0, q0, x4, VECTOR_UNMASKED);
break;
case 0xE3:
if (rv64_xtheadvector) return 0; // lack of vaddu.vv
INST_NAME("PAVGW Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
@ -1672,7 +1692,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
if (MODREG) {
q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
VMV_V_I(VMASK, 0b01);
vector_loadmask(dyn, ninst, VMASK, 1, x1, 1);
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
q1 = fpu_get_scratch(dyn);
@ -1688,6 +1708,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VSLL_VX(q0, q0, x4, VECTOR_UNMASKED);
break;
case 0xF5:
if (rv64_xtheadvector) return 0; // lack of vrgatherei16.vv
INST_NAME("PMADDWD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
@ -1722,9 +1744,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VSRA_VI(v1, v0, 15, VECTOR_UNMASKED);
VXOR_VV(v0, v1, v0, VECTOR_UNMASKED);
VSUB_VV(v1, v0, v1, VECTOR_UNMASKED);
ADDI(x4, xZR, 0xFF);
VXOR_VV(VMASK, VMASK, VMASK, VECTOR_UNMASKED);
VMV_S_X(VMASK, x4);
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 2);
VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
VREDSUM_VS(v0, v1, v0, VECTOR_MASKED); // sum low 64
VSLIDEDOWN_VI(d0, v1, 8, VECTOR_UNMASKED);

View File

@ -2606,17 +2606,11 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f
{
if (sew == VECTOR_SEWNA) return VECTOR_SEW8;
if (sew == VECTOR_SEWANY) sew = VECTOR_SEW8;
/* mu: mask undisturbed
* tu: tail undisturbed
* sew: selected element width
* lmul: vector register group multiplier
*
* mu tu sew lmul */
uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | vlmul;
uint32_t vl = (int)((float)(16 >> sew) * multiple);
uint32_t vl = (int)((float)(16 >> sew) * multiple);
uint32_t vtypei = (sew << (3 - !!rv64_xtheadvector)) | vlmul;
if (dyn->inst_sew == VECTOR_SEWNA || dyn->inst_vl == 0 || dyn->inst_sew != sew || dyn->inst_vl != vl) {
if (vl <= 31) {
if (vl <= 31 && !rv64_xtheadvector) {
VSETIVLI(xZR, vl, vtypei);
} else {
ADDI(s1, xZR, vl);
@ -2625,5 +2619,96 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f
}
dyn->inst_sew = sew;
dyn->inst_vl = vl;
dyn->inst_vlmul = vlmul;
return sew;
}
void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int s1, float multiple)
{
#if STEP > 1
uint8_t sew = dyn->inst_sew;
uint8_t vlmul = dyn->inst_vlmul;
if (rv64_xtheadvector) {
if (sew == VECTOR_SEW64 && vlmul == VECTOR_LMUL1) {
switch (imm) {
case 0:
VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED);
return;
case 1:
ADDI(s1, xZR, 1);
VMV_S_X(vreg, s1);
return;
case 2:
int scratch = fpu_get_scratch(dyn);
VMV_V_I(scratch, 1);
VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED);
return;
case 3:
VMV_V_I(vreg, 1);
return;
default: abort();
}
} else if ((sew == VECTOR_SEW32 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW64 && vlmul == VECTOR_LMUL2)) {
switch (imm) {
case 0b0001:
ADDI(s1, xZR, 1);
VMV_S_X(vreg, s1);
return;
case 0b1010:
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
MOV64x(s1, 0x100000000ULL);
VMV_V_X(vreg, s1);
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
return;
default: abort();
}
} else if ((sew == VECTOR_SEW16 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW32 && vlmul == VECTOR_LMUL2)) {
switch (imm) {
case 0b01010101:
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
MOV64x(s1, 0x100000001ULL);
VMV_V_X(vreg, s1);
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
return;
case 0b10101010:
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
MOV64x(s1, 0x1000000010000ULL);
VMV_V_X(vreg, s1);
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
return;
default: abort();
}
} else if ((sew == VECTOR_SEW8 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW16 && vlmul == VECTOR_LMUL2)) {
switch (imm) {
case 0b0000000011111111:
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
MOV64x(s1, 0xFFFFFFFFFFFFFFFFULL);
VMV_S_X(vreg, s1);
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
return;
case 0b0101010101010101:
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
MOV64x(s1, 0x0001000100010001ULL);
VMV_V_X(vreg, s1);
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
return;
case 0b1010101010101010:
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
MOV64x(s1, 0x0100010001000100ULL);
VMV_V_X(vreg, s1);
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
return;
default: abort();
}
} else
abort();
} else {
if (imm <= 0xF && (dyn->vector_eew == VECTOR_SEW32 || dyn->vector_eew == VECTOR_SEW64)) {
VMV_V_I(vreg, imm);
} else {
MOV64x(s1, imm);
VMV_V_X(vreg, s1);
}
}
#endif
}

View File

@ -1292,7 +1292,8 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr);
#define rv64_move64 STEPNAME(rv64_move64)
#define rv64_move32 STEPNAME(rv64_move32)
#define vector_vsetvli STEPNAME(vector_vsetvli)
#define vector_vsetvli STEPNAME(vector_vsetvli)
#define vector_loadmask STEPNAME(vector_loadmask)
/* setup r2 to address pointed by */
uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta);
@ -1449,6 +1450,7 @@ void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val);
void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup);
int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, float multiple);
void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int s1, float multiple);
#if STEP < 2
#define CHECK_CACHE() 0

View File

@ -30,7 +30,8 @@
dyn->e.olds[i].v = 0; \
dyn->insts[ninst].f_entry = dyn->f; \
dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \
dyn->inst_sew = VECTOR_SEWNA; \
dyn->inst_sew = dyn->vector_sew; \
dyn->inst_vlmul = VECTOR_LMUL1; \
dyn->inst_vl = 0; \
if (ninst) \
dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr;

View File

@ -8,7 +8,8 @@
for (int i = 0; i < 16; ++i) \
dyn->e.olds[i].v = 0; \
dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \
dyn->inst_sew = VECTOR_SEWNA; \
dyn->inst_sew = dyn->vector_sew; \
dyn->inst_vlmul = VECTOR_LMUL1; \
dyn->inst_vl = 0; \
dyn->e.swapped = 0; \
dyn->e.barrier = 0

View File

@ -9,7 +9,8 @@
#define EMIT(A) do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0)
#define NEW_INST \
dyn->vector_sew = dyn->insts[ninst].vector_sew_entry; \
dyn->inst_sew = VECTOR_SEWNA; \
dyn->inst_sew = dyn->vector_sew; \
dyn->inst_vlmul = VECTOR_LMUL1; \
dyn->inst_vl = 0; \
if (ninst) { \
dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size); \

View File

@ -14,7 +14,8 @@
#define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__)
#define NEW_INST \
dyn->vector_sew = dyn->insts[ninst].vector_sew_entry; \
dyn->inst_sew = VECTOR_SEWNA; \
dyn->inst_sew = dyn->vector_sew; \
dyn->inst_vlmul = VECTOR_LMUL1; \
dyn->inst_vl = 0; \
if (box64_dynarec_dump) print_newinst(dyn, ninst); \
if (ninst) { \

View File

@ -154,10 +154,11 @@ typedef struct dynarec_rv64_s {
uint16_t ymm_zero; // bitmap of ymm to zero at purge
uint8_t always_test;
uint8_t abort;
uint8_t vector_sew; // current sew status
uint8_t vector_eew; // current effective sew status, should only be used after SET_ELEMENT_WIDTH
uint8_t inst_sew; // sew inside current instruction, for vsetvli elimination
uint8_t inst_vl; // vl inside current instruction, for vsetvli elimination
uint8_t vector_sew; // current sew status
uint8_t vector_eew; // current effective sew status, should only be used after SET_ELEMENT_WIDTH
uint8_t inst_sew; // sew inside current instruction, for vsetvli elimination
uint8_t inst_vl; // vl inside current instruction, for vsetvli elimination
uint8_t inst_vlmul; // vlmul inside current instruction
} dynarec_rv64_t;
// v0 is hardware wired to vector mask register, which should be always reserved

View File

@ -1224,6 +1224,26 @@ f2831 ft811 FP temporaries Caller
// Vector extension emitter
/* Warning: mind the differences between RVV 1.0 and XTheadVector!
*
* - Different encoding of vsetvl/th.vsetvl.
* - No vsetivli instruction.
* - Cannot configure vta and vma vsetvl instruction, the fixed value is TAMU.
* - No whole register move instructions.
* - No fractional lmul.
* - Different load/store instructions.
* - Different name of vector indexed instructions.
* - Destination vector register cannot overlap source vector register group for vmadc/vmsbc/widen arithmetic/narrow arithmetic.
* - No vlm/vsm instructions.
* - Different vnsrl/vnsra/vfncvt suffix (vv/vx/vi vs wv/wx/wi).
* - Different size of mask mode (1.0 is vl and xtheadvector is vlen).
* - No vrgatherei16.vv instruction.
* - Different encoding of vmv.s.x instruction.
*
* We ignore all the naming differences and use the RVV 1.0 naming convention.
*/
#define VECTOR_SEW8 0b000
#define VECTOR_SEW16 0b001
#define VECTOR_SEW32 0b010
@ -1277,15 +1297,16 @@ f2831 ft811 FP temporaries Caller
// Vector Indexed-Unordered Instructions (including segment part)
// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#76-vector-indexed-instructions
// Note: Make sure SEW in vtype is always the same as EEW, for xtheadvector compatibility!
#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111
#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111
#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111
#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111
#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111
#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111
#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111
#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111
#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111
#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111
#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111
#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111
#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111
#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111
#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111
#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111
// Vector Strided Instructions (including segment part)
// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#75-vector-strided-instructions
@ -1420,31 +1441,32 @@ f2831 ft811 FP temporaries Caller
#define VFMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101110...........001.....1010111
#define VFNMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101111...........001.....1010111
#define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111
#define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111
#define VFCVT_F_XU_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00010, 0b001, vd, 0b1010111)) // 010010......00010001.....1010111
#define VFCVT_F_X_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00011, 0b001, vd, 0b1010111)) // 010010......00011001.....1010111
#define VFCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00110, 0b001, vd, 0b1010111)) // 010010......00110001.....1010111
#define VFCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00111, 0b001, vd, 0b1010111)) // 010010......00111001.....1010111
#define VFWCVT_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01000, 0b001, vd, 0b1010111)) // 010010......01000001.....1010111
#define VFWCVT_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01001, 0b001, vd, 0b1010111)) // 010010......01001001.....1010111
#define VFWCVT_F_XU_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01010, 0b001, vd, 0b1010111)) // 010010......01010001.....1010111
#define VFWCVT_F_X_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01011, 0b001, vd, 0b1010111)) // 010010......01011001.....1010111
#define VFWCVT_F_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01100, 0b001, vd, 0b1010111)) // 010010......01100001.....1010111
#define VFWCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01110, 0b001, vd, 0b1010111)) // 010010......01110001.....1010111
#define VFWCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01111, 0b001, vd, 0b1010111)) // 010010......01111001.....1010111
#define VFNCVT_XU_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010010......10000001.....1010111
#define VFNCVT_X_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10001, 0b001, vd, 0b1010111)) // 010010......10001001.....1010111
#define VFNCVT_F_XU_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10010, 0b001, vd, 0b1010111)) // 010010......10010001.....1010111
#define VFNCVT_F_X_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10011, 0b001, vd, 0b1010111)) // 010010......10011001.....1010111
#define VFNCVT_F_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10100, 0b001, vd, 0b1010111)) // 010010......10100001.....1010111
#define VFNCVT_ROD_F_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10101, 0b001, vd, 0b1010111)) // 010010......10101001.....1010111
#define VFNCVT_RTZ_XU_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10110, 0b001, vd, 0b1010111)) // 010010......10110001.....1010111
#define VFNCVT_RTZ_X_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10111, 0b001, vd, 0b1010111)) // 010010......10111001.....1010111
#define VFSQRT_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010011......00000001.....1010111
#define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111
#define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111
#define VFCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00010, 0b001, vd, 0b1010111)) // 010010......00010001.....1010111
#define VFCVT_F_X_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00011, 0b001, vd, 0b1010111)) // 010010......00011001.....1010111
#define VFCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00110, 0b001, vd, 0b1010111)) // 010010......00110001.....1010111
#define VFCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00111, 0b001, vd, 0b1010111)) // 010010......00111001.....1010111
#define VFWCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01000, 0b001, vd, 0b1010111)) // 010010......01000001.....1010111
#define VFWCVT_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01001, 0b001, vd, 0b1010111)) // 010010......01001001.....1010111
#define VFWCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01010, 0b001, vd, 0b1010111)) // 010010......01010001.....1010111
#define VFWCVT_F_X_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01011, 0b001, vd, 0b1010111)) // 010010......01011001.....1010111
#define VFWCVT_F_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01100, 0b001, vd, 0b1010111)) // 010010......01100001.....1010111
#define VFWCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01110, 0b001, vd, 0b1010111)) // 010010......01110001.....1010111
#define VFWCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01111, 0b001, vd, 0b1010111)) // 010010......01111001.....1010111
#define VFNCVT_XU_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010010......10000001.....1010111
#define VFNCVT_X_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10001, 0b001, vd, 0b1010111)) // 010010......10001001.....1010111
#define VFNCVT_F_XU_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10010, 0b001, vd, 0b1010111)) // 010010......10010001.....1010111
#define VFNCVT_F_X_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10011, 0b001, vd, 0b1010111)) // 010010......10011001.....1010111
#define VFNCVT_F_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10100, 0b001, vd, 0b1010111)) // 010010......10100001.....1010111
#define VFNCVT_ROD_F_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10101, 0b001, vd, 0b1010111)) // 010010......10101001.....1010111
#define VFNCVT_RTZ_XU_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10110, 0b001, vd, 0b1010111)) // 010010......10110001.....1010111
#define VFNCVT_RTZ_X_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10111, 0b001, vd, 0b1010111)) // 010010......10111001.....1010111
#define VFSQRT_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010011......00000001.....1010111
#define VFCLASS_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111
#define VFRSQRT7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00100, 0b001, vd, 0b1010111)) // 010011......00100001.....1010111
#define VFREC7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00101, 0b001, vd, 0b1010111)) // 010011......00101001.....1010111
#define VFCLASS_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111
#define VFWADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1100000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110000...........001.....1010111
#define VFWREDUSUM_VS(vd, vs2, vs1, vm) EMIT(R_type(0b1100010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110001...........001.....1010111
@ -1473,10 +1495,10 @@ f2831 ft811 FP temporaries Caller
#define VSLIDEUP_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001110...........100.....1010111
#define VSLIDEDOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001111...........100.....1010111
#define VADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100000, vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111
#define VADC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111
#define VMADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100010, vs2, rs1, 0b100, vd, 0b1010111)) // 0100010..........100.....1010111
#define VMADC_VX(vd, vs2, rs1) EMIT(R_type(0b0100011, vs2, rs1, 0b100, vd, 0b1010111)) // 0100011..........100.....1010111
#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100100, vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111
#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100100 | rv64_xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111
#define VMSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100110, vs2, rs1, 0b100, vd, 0b1010111)) // 0100110..........100.....1010111
#define VMSBC_VX(vd, vs2, rs1) EMIT(R_type(0b0100111, vs2, rs1, 0b100, vd, 0b1010111)) // 0100111..........100.....1010111
#define VMERGE_VXM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b100, vd, 0b1010111)) // 0101110..........100.....1010111
@ -1519,10 +1541,10 @@ f2831 ft811 FP temporaries Caller
#define VRGATHER_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001100...........000.....1010111
#define VRGATHEREI16_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001110...........000.....1010111
#define VADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100000, vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111
#define VADC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111
#define VMADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100010, vs2, vs1, 0b000, vd, 0b1010111)) // 0100010..........000.....1010111
#define VMADC_VV(vd, vs2, vs1) EMIT(R_type(0b0100011, vs2, vs1, 0b000, vd, 0b1010111)) // 0100011..........000.....1010111
#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100100, vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111
#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100100 | rv64_xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111
#define VMSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100110, vs2, vs1, 0b000, vd, 0b1010111)) // 0100110..........000.....1010111
#define VMSBC_VV(vd, vs2, vs1) EMIT(R_type(0b0100111, vs2, vs1, 0b000, vd, 0b1010111)) // 0100111..........000.....1010111
#define VMERGE_VVM(vd, vs2, vs1) EMIT(R_type(0b0101110, vs2, vs1, 0b000, vd, 0b1010111)) // 0101110..........000.....1010111
@ -1562,7 +1584,7 @@ f2831 ft811 FP temporaries Caller
#define VSLIDEUP_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001110...........011.....1010111
#define VSLIDEDOWN_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001111...........011.....1010111
#define VADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100000, vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111
#define VADC_VIM(vd, vs2, simm5) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111
#define VMADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100010, vs2, simm5, 0b011, vd, 0b1010111)) // 0100010..........011.....1010111
#define VMADC_VI(vd, vs2, simm5) EMIT(R_type(0b0100011, vs2, simm5, 0b011, vd, 0b1010111)) // 0100011..........011.....1010111
#define VMERGE_VIM(vd, vs2, simm5) EMIT(R_type(0b0101110, vs2, simm5, 0b011, vd, 0b1010111)) // 0101110..........011.....1010111
@ -1602,12 +1624,13 @@ f2831 ft811 FP temporaries Caller
#define VREDMIN_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000101...........010.....1010111
#define VREDMAXU_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000110...........010.....1010111
#define VREDMAX_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000111...........010.....1010111
#define VAADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001000...........010.....1010111
#define VAADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001001...........010.....1010111
#define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111
#define VASUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001011...........010.....1010111
// Warning, no unsigned edition in Xtheadvector
#define VAADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001000...........010.....1010111
#define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111
#define VMV_X_S(rd, vs2) EMIT(R_type(0b0100001, vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111
#define VMV_X_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111
// Vector Integer Extension Instructions
// https://github.com/riscv/riscv-v-spec/blob/e49574c92b072fd4d71e6cb20f7e8154de5b83fe/v-spec.adoc#123-vector-integer-extension
@ -1629,12 +1652,12 @@ f2831 ft811 FP temporaries Caller
#define VMNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111101, vs2, vs1, 0b010, vd, 0b1010111)) // 0111101..........010.....1010111
#define VMXNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111111, vs2, vs1, 0b010, vd, 0b1010111)) // 0111111..........010.....1010111
#define VMSBF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111
#define VMSOF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111
#define VMSIF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00011, 0b010, vd, 0b1010111)) // 010100......00011010.....1010111
#define VIOTA_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b10000, 0b010, vd, 0b1010111)) // 010100......10000010.....1010111
#define VCPOP_M(rd, vs2, vm) EMIT(R_type(0b0100000 | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111
#define VFIRST_M(rd, vs2, vm) EMIT(R_type(0b0100000 | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111
#define VMSBF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111
#define VMSOF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111
#define VMSIF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00011, 0b010, vd, 0b1010111)) // 010100......00011010.....1010111
#define VIOTA_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b10000, 0b010, vd, 0b1010111)) // 010100......10000010.....1010111
#define VCPOP_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111
#define VFIRST_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111
#define VID_V(vd, vm) EMIT(R_type(0b0101000 | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111
@ -1673,7 +1696,8 @@ f2831 ft811 FP temporaries Caller
#define VSLIDE1UP_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001110...........110.....1010111
#define VSLIDE1DOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001111...........110.....1010111
#define VMV_S_X(vd, rs1) EMIT(I_type(0b010000100000, rs1, 0b110, vd, 0b1010111)) // 010000100000.....110.....1010111
// Warning, upper elements will be cleared in xtheadvector!
#define VMV_S_X(vd, rs1) EMIT(I_type((rv64_xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b110, vd, 0b1010111))
#define VDIVU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100000...........110.....1010111
#define VDIV_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100001...........110.....1010111

View File

@ -362,6 +362,37 @@ const char* rv64_print(uint32_t opcode, uintptr_t addr)
}
}
if (rv64_xtheadvector) {
/* These are written by hand.... */
// rv_v, VSETVLI
if ((opcode & 0x8000707f) == 0x7057) {
a.imm = FX(opcode, 30, 20);
a.rs1 = FX(opcode, 19, 15);
a.rd = FX(opcode, 11, 7);
const char *lmul_str, *sew_str;
switch (a.imm & 0b11) {
case 0b00: lmul_str = "m1"; break;
case 0b01: lmul_str = "m2"; break;
case 0b10: lmul_str = "m4"; break;
case 0b11: lmul_str = "m8"; break;
default: lmul_str = "reserved"; break;
}
switch ((a.imm & 0b0011100) >> 2) {
case 0b000: sew_str = "e8"; break;
case 0b001: sew_str = "e16"; break;
case 0b010: sew_str = "e32"; break;
case 0b011: sew_str = "e64"; break;
default: sew_str = "reserved"; break;
}
snprintf(buff, sizeof(buff), "%-15s %s, %s, %s, %s", "VSETVLI", gpr[a.rd], gpr[a.rs1], sew_str, lmul_str);
return buff;
}
// TODO: add more...
}
/****************
* Generated by https://github.com/ksco/riscv-opcodes/tree/box64_printer
* Command: python parse.py -box64 rv_a rv_d rv_f rv_i rv_m rv_v rv_zba rv_zbb rv_zbc rv_zicsr rv_zbs rv64_a rv64_d rv64_f rv64_i rv64_m rv64_zba rv64_zbb rv64_zbs > code.c

View File

@ -56,6 +56,7 @@ extern int rv64_zbb;
extern int rv64_zbc;
extern int rv64_zbs;
extern int rv64_vector;
extern int rv64_xtheadvector; // rvv 1.0 or xtheadvector
extern int rv64_vlen;
extern int rv64_xtheadba;
extern int rv64_xtheadbb;

View File

@ -68,12 +68,18 @@ void RV64_Detect_Function()
BR(xRA);
rv64_zbs = Check(my_block);
// Test Vector v1.0 with CSRR zero, vcsr
block = (uint32_t*)my_block;
CSRRS(xZR, xZR, 0x00f);
CSRRS(xZR, xZR, 0xc22 /* vlenb */);
BR(xRA);
rv64_vector = Check(my_block);
if (rv64_vector) {
block = (uint32_t*)my_block;
CSRRS(xZR, xZR, 0x00f /* vcsr */); // vcsr does not exists in xtheadvector
BR(xRA);
rv64_xtheadvector = !Check(my_block);
}
if (rv64_vector) {
int vlenb = 0;
asm volatile("csrr %0, 0xc22" : "=r"(vlenb));