mirror of
https://github.com/ptitSeb/box64.git
synced 2024-10-06 19:33:37 +00:00
[RV64_DYNAREC] Added preliminary xtheadvector support (#1892)
* [RV64_DYNAREC] Added preliminary xtheadvector support * [RV64_DYNAREC] Fixed more unaligned issue
This commit is contained in:
parent
a188f4ebfe
commit
c43d34d0cf
20
src/core.c
20
src/core.c
@ -112,7 +112,8 @@ int rv64_zba = 0;
|
||||
int rv64_zbb = 0;
|
||||
int rv64_zbc = 0;
|
||||
int rv64_zbs = 0;
|
||||
int rv64_vector = 0;
|
||||
int rv64_vector = 0; // rvv 1.0 or xtheadvector
|
||||
int rv64_xtheadvector = 0;
|
||||
int rv64_vlen = 0;
|
||||
int rv64_xtheadba = 0;
|
||||
int rv64_xtheadbb = 0;
|
||||
@ -516,6 +517,7 @@ HWCAP2_AFP
|
||||
if (p != NULL && !strcasecmp(p, "vector")) {
|
||||
RV64_Detect_Function();
|
||||
rv64_vector = 0;
|
||||
rv64_xtheadvector = 0;
|
||||
}
|
||||
printf_log(LOG_INFO, "Dynarec for RISC-V ");
|
||||
printf_log(LOG_INFO, "With extension: I M A F D C");
|
||||
@ -523,16 +525,18 @@ HWCAP2_AFP
|
||||
if(rv64_zbb) printf_log(LOG_INFO, " Zbb");
|
||||
if(rv64_zbc) printf_log(LOG_INFO, " Zbc");
|
||||
if(rv64_zbs) printf_log(LOG_INFO, " Zbs");
|
||||
if (rv64_vector) printf_log(LOG_INFO, " Vector (vlen: %d)", rv64_vlen);
|
||||
if (rv64_vector && !rv64_xtheadvector) printf_log(LOG_INFO, " Vector (vlen: %d)", rv64_vlen);
|
||||
if (rv64_xtheadvector) printf_log(LOG_INFO, " XTheadVector (vlen: %d)", rv64_vlen);
|
||||
if(rv64_xtheadba) printf_log(LOG_INFO, " XTheadBa");
|
||||
if(rv64_xtheadbb) printf_log(LOG_INFO, " XTheadBb");
|
||||
if(rv64_xtheadbs) printf_log(LOG_INFO, " XTheadBs");
|
||||
if(rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov");
|
||||
if(rv64_xtheadmemidx) printf_log(LOG_INFO, " XTheadMemIdx");
|
||||
if(rv64_xtheadmempair) printf_log(LOG_INFO, " XTheadMemPair");
|
||||
if(rv64_xtheadfmemidx) printf_log(LOG_INFO, " XTheadFMemIdx");
|
||||
if(rv64_xtheadmac) printf_log(LOG_INFO, " XTheadMac");
|
||||
if(rv64_xtheadfmv) printf_log(LOG_INFO, " XTheadFmv");
|
||||
if (rv64_xtheadmempair) printf_log(LOG_INFO, " XTheadMemPair");
|
||||
// Disable the display since these are only detected but never used.
|
||||
// if(rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov");
|
||||
// if(rv64_xtheadmemidx) printf_log(LOG_INFO, " XTheadMemIdx");
|
||||
// if(rv64_xtheadfmemidx) printf_log(LOG_INFO, " XTheadFMemIdx");
|
||||
// if(rv64_xtheadmac) printf_log(LOG_INFO, " XTheadMac");
|
||||
// if(rv64_xtheadfmv) printf_log(LOG_INFO, " XTheadFmv");
|
||||
#else
|
||||
#error Unsupported architecture
|
||||
#endif
|
||||
|
@ -98,20 +98,44 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
|
||||
nextop = F8;
|
||||
if (MODREG) {
|
||||
INST_NAME("MOVHLPS Gx, Ex");
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
GETGX_vector(v0, 1, dyn->vector_eew);
|
||||
GETEX_vector(v1, 0, 0, VECTOR_SEW64);
|
||||
if (MODREG) {
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
GETGX_vector(v0, 1, VECTOR_SEW64);
|
||||
GETEX_vector(v1, 0, 0, VECTOR_SEW64);
|
||||
} else {
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); // unaligned!
|
||||
GETGX_vector(v0, 1, VECTOR_SEW8);
|
||||
GETEX_vector(v1, 0, 0, VECTOR_SEW8);
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
}
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
VSLIDEDOWN_VI(q0, v1, 1, VECTOR_UNMASKED);
|
||||
VMV_X_S(x4, q0);
|
||||
VMV_S_X(v0, x4);
|
||||
if (rv64_xtheadvector) {
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
|
||||
VMERGE_VVM(v0, v0, q0); // implies VMASK
|
||||
} else {
|
||||
VMV_X_S(x4, q0);
|
||||
VMV_S_X(v0, x4);
|
||||
}
|
||||
} else {
|
||||
INST_NAME("MOVLPS Gx, Ex");
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
GETGX_vector(v0, 1, VECTOR_SEW64);
|
||||
GETEX_vector(v1, 0, 0, VECTOR_SEW64);
|
||||
VMV_X_S(x4, v1);
|
||||
VMV_S_X(v0, x4);
|
||||
if (MODREG) {
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
GETGX_vector(v0, 1, VECTOR_SEW64);
|
||||
GETEX_vector(v1, 0, 0, VECTOR_SEW64);
|
||||
} else {
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); // unaligned!
|
||||
GETGX_vector(v0, 1, VECTOR_SEW8);
|
||||
GETEX_vector(v1, 0, 0, VECTOR_SEW8);
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
}
|
||||
if (rv64_xtheadvector) {
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
|
||||
VMERGE_VVM(v0, v0, v1); // implies VMASK
|
||||
} else {
|
||||
VMV_X_S(x4, v1);
|
||||
VMV_S_X(v0, x4);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 0x16:
|
||||
@ -134,8 +158,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
|
||||
SMREAD();
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
|
||||
v1 = fpu_get_scratch(dyn);
|
||||
MOV64x(x4, 0xFF);
|
||||
VMV_S_X(VMASK, x4);
|
||||
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1);
|
||||
VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
|
||||
VSLIDEUP_VI(v0, v1, 8, VECTOR_UNMASKED);
|
||||
}
|
||||
@ -150,8 +173,13 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
|
||||
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED);
|
||||
VMV_X_S(x4, q0);
|
||||
VMV_S_X(v1, x4);
|
||||
if (rv64_xtheadvector) {
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
|
||||
VMERGE_VVM(v1, v1, q0); // implies VMASK
|
||||
} else {
|
||||
VMV_X_S(x4, q0);
|
||||
VMV_S_X(v1, x4);
|
||||
}
|
||||
} else {
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
@ -209,6 +237,8 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
|
||||
}
|
||||
break;
|
||||
case 0xC6:
|
||||
if (rv64_xtheadvector) return 0; // lack of vrgatherei16.vv
|
||||
|
||||
INST_NAME("SHUFPS Gx, Ex, Ib");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
|
@ -99,14 +99,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
} else {
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
VXOR_VV(q0, q0, q0, VECTOR_UNMASKED);
|
||||
VMV_V_I(VMASK, 0b10);
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1);
|
||||
SMREAD();
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
|
||||
VLUXEI64_V(v0, q0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
|
||||
}
|
||||
break;
|
||||
case 0x15:
|
||||
INST_NAME("PUNPCKHQDQ Gx, Ex");
|
||||
INST_NAME("UNPCKHPD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
// GX->q[0] = GX->q[1];
|
||||
@ -114,14 +114,19 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
GETGX_vector(v0, 1, VECTOR_SEW64);
|
||||
if (MODREG) {
|
||||
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
|
||||
q0 == fpu_get_scratch(dyn);
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED);
|
||||
VMV_X_S(x4, q0);
|
||||
if (v0 != v1) { VMV_V_V(v0, v1); }
|
||||
VMV_S_X(v0, x4);
|
||||
if (rv64_xtheadvector) {
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
|
||||
VMERGE_VVM(v0, v1, q0); // implies VMASK
|
||||
} else {
|
||||
if (v0 != v1) { VMV_V_V(v0, v1); }
|
||||
VMV_X_S(x4, q0);
|
||||
VMV_S_X(v0, x4);
|
||||
}
|
||||
} else {
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
VMV_V_I(VMASK, 0b10);
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1);
|
||||
VSLIDE1DOWN_VX(v0, v0, xZR, VECTOR_UNMASKED);
|
||||
SMREAD();
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
|
||||
@ -197,8 +202,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
if (q1 & 1) VMV_V_V(d1, q1);
|
||||
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2);
|
||||
VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 8, VECTOR_UNMASKED);
|
||||
MOV64x(x4, 0b0101010101010101);
|
||||
VMV_S_X(VMASK, x4);
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b0101010101010101, x4, 2);
|
||||
VCOMPRESS_VM(d0, v0, VMASK);
|
||||
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
|
||||
VCOMPRESS_VM(d1, v0, VMASK);
|
||||
@ -219,8 +223,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
if (q1 & 1) VMV_V_V(d1, q1);
|
||||
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
|
||||
VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 4, VECTOR_UNMASKED);
|
||||
MOV64x(x4, 0b01010101);
|
||||
VMV_S_X(VMASK, x4);
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b01010101, x4, 2);
|
||||
VCOMPRESS_VM(d0, v0, VMASK);
|
||||
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
|
||||
VCOMPRESS_VM(d1, v0, VMASK);
|
||||
@ -238,8 +241,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches!
|
||||
VWMULSU_VV(v0, q1, q0, VECTOR_UNMASKED);
|
||||
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2);
|
||||
MOV64x(x4, 0b0101010101010101);
|
||||
VMV_S_X(VMASK, x4);
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b0101010101010101, x4, 2);
|
||||
VCOMPRESS_VM(d0, v0, VMASK);
|
||||
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
|
||||
VCOMPRESS_VM(d1, v0, VMASK);
|
||||
@ -307,6 +309,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VADD_VX(q0, q1, xZR, VECTOR_MASKED);
|
||||
break;
|
||||
case 0x17:
|
||||
if (rv64_xtheadvector) return 0; // TODO: VMASK convertion
|
||||
|
||||
INST_NAME("PTEST Gx, Ex");
|
||||
nextop = F8;
|
||||
SETFLAGS(X_ALL, SF_SET);
|
||||
@ -613,6 +617,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
opcode = F8;
|
||||
switch (opcode) {
|
||||
case 0x0E:
|
||||
if (rv64_xtheadvector) return 0; // TODO: VMASK convertion
|
||||
|
||||
INST_NAME("PBLENDW Gx, Ex, Ib");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
@ -668,6 +674,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
}
|
||||
break;
|
||||
case 0x50:
|
||||
if (rv64_xtheadvector) return 0; // TODO: VMASK convertion
|
||||
|
||||
INST_NAME("PMOVMSKD Gd, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
@ -848,8 +856,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
INST_NAME("PUNPCKLBW Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
MOV64x(x1, 0b1010101010101010);
|
||||
VMV_V_X(VMASK, x1); // VMASK = 0b1010101010101010
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b1010101010101010, x1, 1);
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 7 7 6 6 5 5 4 4 3 3 2 2 1 1 0 0
|
||||
GETGX_vector(q0, 1, VECTOR_SEW8);
|
||||
@ -864,8 +871,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
INST_NAME("PUNPCKLWD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
ADDI(x1, xZR, 0b10101010);
|
||||
VMV_V_X(VMASK, x1); // VMASK = 0b10101010
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b10101010, x1, 1);
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 3 3 2 2 1 1 0 0
|
||||
GETGX_vector(q0, 1, VECTOR_SEW16);
|
||||
@ -880,8 +886,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
INST_NAME("PUNPCKLDQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
ADDI(x1, xZR, 0b1010);
|
||||
VMV_V_X(VMASK, x1); // VMASK = 0b1010
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b1010, x1, 1);
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 1 1 0 0
|
||||
GETGX_vector(q0, 1, VECTOR_SEW32);
|
||||
@ -961,8 +966,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
INST_NAME("PUNPCKHBW Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
ADDI(x1, xZR, 0b1010101010101010);
|
||||
VMV_V_X(VMASK, x1); // VMASK = 0b1010101010101010
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b1010101010101010, x1, 1);
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
VIOTA_M(v0, VMASK, VECTOR_UNMASKED);
|
||||
VADD_VI(v0, v0, 8, VECTOR_UNMASKED); // v0 = 15 15 14 14 13 13 12 12 11 11 10 10 9 9 8 8
|
||||
@ -970,8 +974,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
INST_NAME("PUNPCKHWD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
ADDI(x1, xZR, 0b10101010);
|
||||
VMV_V_X(VMASK, x1); // VMASK = 0b10101010
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b10101010, x1, 1);
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
VIOTA_M(v0, VMASK, VECTOR_UNMASKED);
|
||||
VADD_VI(v0, v0, 4, VECTOR_UNMASKED); // v0 = 7 7 6 6 5 5 4 4
|
||||
@ -979,7 +982,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
INST_NAME("PUNPCKHDQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
VMV_V_I(VMASK, 0b1010);
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b1010, x1, 1);
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
VIOTA_M(v0, VMASK, VECTOR_UNMASKED);
|
||||
VADD_VI(v0, v0, 2, VECTOR_UNMASKED); // v0 = 3 3 2 2
|
||||
@ -1029,7 +1032,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
} else {
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
VXOR_VV(q0, q0, q0, VECTOR_UNMASKED);
|
||||
VMV_V_I(VMASK, 0b10);
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1);
|
||||
SMREAD();
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
|
||||
VLUXEI64_V(v0, q0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
|
||||
@ -1044,14 +1047,19 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
GETGX_vector(v0, 1, VECTOR_SEW64);
|
||||
if (MODREG) {
|
||||
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
|
||||
q0 == fpu_get_scratch(dyn);
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED);
|
||||
VMV_X_S(x4, q0);
|
||||
if (v0 != v1) { VMV_V_V(v0, v1); }
|
||||
VMV_S_X(v0, x4);
|
||||
if (rv64_xtheadvector) {
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
|
||||
VMERGE_VVM(v0, v1, q0); // implies VMASK
|
||||
} else {
|
||||
if (v0 != v1) { VMV_V_V(v0, v1); }
|
||||
VMV_X_S(x4, q0);
|
||||
VMV_S_X(v0, x4);
|
||||
}
|
||||
} else {
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
VMV_V_I(VMASK, 0b10);
|
||||
vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1);
|
||||
VSLIDE1DOWN_VX(v0, v0, xZR, VECTOR_UNMASKED);
|
||||
SMREAD();
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
|
||||
@ -1069,7 +1077,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
SET_ELEMENT_WIDTH(x3, VECTOR_SEW32, 1);
|
||||
}
|
||||
VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
|
||||
VMV_V_I(VMASK, 1);
|
||||
vector_loadmask(dyn, ninst, VMASK, 1, x4, 1);
|
||||
VMERGE_VXM(v0, v0, ed);
|
||||
break;
|
||||
case 0x6F:
|
||||
@ -1088,6 +1096,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
}
|
||||
break;
|
||||
case 0x70:
|
||||
if (rv64_xtheadvector) return 0; // lack of vrgatherei16.vv
|
||||
|
||||
INST_NAME("PSHUFD Gx, Ex, Ib");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
@ -1316,6 +1326,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
break;
|
||||
case 0xA3 ... 0xC1: return 0;
|
||||
case 0xC4:
|
||||
if (rv64_xtheadvector) return 0; // TODO: VMASK convertion
|
||||
|
||||
INST_NAME("PINSRW Gx, Ed, Ib");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
@ -1377,7 +1389,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
if (MODREG) {
|
||||
q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
|
||||
} else {
|
||||
VMV_V_I(VMASK, 0b01);
|
||||
vector_loadmask(dyn, ninst, VMASK, 1, x1, 1);
|
||||
SMREAD();
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
|
||||
q1 = fpu_get_scratch(dyn);
|
||||
@ -1417,7 +1429,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
if (MODREG) {
|
||||
q1 = sse_get_reg_empty_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3));
|
||||
VMV_X_S(x4, q0);
|
||||
VXOR_VV(q1, q1, q1, VECTOR_UNMASKED);
|
||||
if (!rv64_xtheadvector) {
|
||||
VXOR_VV(q1, q1, q1, VECTOR_UNMASKED);
|
||||
}
|
||||
VMV_S_X(q1, x4);
|
||||
} else {
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
|
||||
@ -1427,6 +1441,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
}
|
||||
break;
|
||||
case 0xD7:
|
||||
if (rv64_xtheadvector) return 0; // TODO: VMASK convertion
|
||||
|
||||
INST_NAME("PMOVMSKB Gd, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -1503,6 +1519,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VAND_VV(q0, q1, q0, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0xE0:
|
||||
if (rv64_xtheadvector) return 0; // lack of vaddu.vv
|
||||
|
||||
INST_NAME("PAVGB Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -1516,7 +1534,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
GETGX_vector(q0, 1, VECTOR_SEW64);
|
||||
VMV_V_I(VMASK, 0b01);
|
||||
vector_loadmask(dyn, ninst, VMASK, 1, x1, 1);
|
||||
if (MODREG) {
|
||||
q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
|
||||
} else {
|
||||
@ -1537,7 +1555,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
GETGX_vector(q0, 1, VECTOR_SEW64);
|
||||
VMV_V_I(VMASK, 0b01);
|
||||
vector_loadmask(dyn, ninst, VMASK, 1, x1, 1);
|
||||
if (MODREG) {
|
||||
q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
|
||||
} else {
|
||||
@ -1554,6 +1572,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VSRA_VX(q0, q0, x4, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0xE3:
|
||||
if (rv64_xtheadvector) return 0; // lack of vaddu.vv
|
||||
|
||||
INST_NAME("PAVGW Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
@ -1672,7 +1692,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
if (MODREG) {
|
||||
q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
|
||||
} else {
|
||||
VMV_V_I(VMASK, 0b01);
|
||||
vector_loadmask(dyn, ninst, VMASK, 1, x1, 1);
|
||||
SMREAD();
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0);
|
||||
q1 = fpu_get_scratch(dyn);
|
||||
@ -1688,6 +1708,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VSLL_VX(q0, q0, x4, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0xF5:
|
||||
if (rv64_xtheadvector) return 0; // lack of vrgatherei16.vv
|
||||
|
||||
INST_NAME("PMADDWD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
@ -1722,9 +1744,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VSRA_VI(v1, v0, 15, VECTOR_UNMASKED);
|
||||
VXOR_VV(v0, v1, v0, VECTOR_UNMASKED);
|
||||
VSUB_VV(v1, v0, v1, VECTOR_UNMASKED);
|
||||
ADDI(x4, xZR, 0xFF);
|
||||
VXOR_VV(VMASK, VMASK, VMASK, VECTOR_UNMASKED);
|
||||
VMV_S_X(VMASK, x4);
|
||||
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 2);
|
||||
VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
|
||||
VREDSUM_VS(v0, v1, v0, VECTOR_MASKED); // sum low 64
|
||||
VSLIDEDOWN_VI(d0, v1, 8, VECTOR_UNMASKED);
|
||||
|
@ -2606,17 +2606,11 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f
|
||||
{
|
||||
if (sew == VECTOR_SEWNA) return VECTOR_SEW8;
|
||||
if (sew == VECTOR_SEWANY) sew = VECTOR_SEW8;
|
||||
/* mu: mask undisturbed
|
||||
* tu: tail undisturbed
|
||||
* sew: selected element width
|
||||
* lmul: vector register group multiplier
|
||||
*
|
||||
* mu tu sew lmul */
|
||||
uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | vlmul;
|
||||
uint32_t vl = (int)((float)(16 >> sew) * multiple);
|
||||
|
||||
uint32_t vl = (int)((float)(16 >> sew) * multiple);
|
||||
uint32_t vtypei = (sew << (3 - !!rv64_xtheadvector)) | vlmul;
|
||||
if (dyn->inst_sew == VECTOR_SEWNA || dyn->inst_vl == 0 || dyn->inst_sew != sew || dyn->inst_vl != vl) {
|
||||
if (vl <= 31) {
|
||||
if (vl <= 31 && !rv64_xtheadvector) {
|
||||
VSETIVLI(xZR, vl, vtypei);
|
||||
} else {
|
||||
ADDI(s1, xZR, vl);
|
||||
@ -2625,5 +2619,96 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f
|
||||
}
|
||||
dyn->inst_sew = sew;
|
||||
dyn->inst_vl = vl;
|
||||
dyn->inst_vlmul = vlmul;
|
||||
return sew;
|
||||
}
|
||||
|
||||
void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int s1, float multiple)
|
||||
{
|
||||
#if STEP > 1
|
||||
uint8_t sew = dyn->inst_sew;
|
||||
uint8_t vlmul = dyn->inst_vlmul;
|
||||
if (rv64_xtheadvector) {
|
||||
if (sew == VECTOR_SEW64 && vlmul == VECTOR_LMUL1) {
|
||||
switch (imm) {
|
||||
case 0:
|
||||
VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED);
|
||||
return;
|
||||
case 1:
|
||||
ADDI(s1, xZR, 1);
|
||||
VMV_S_X(vreg, s1);
|
||||
return;
|
||||
case 2:
|
||||
int scratch = fpu_get_scratch(dyn);
|
||||
VMV_V_I(scratch, 1);
|
||||
VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED);
|
||||
return;
|
||||
case 3:
|
||||
VMV_V_I(vreg, 1);
|
||||
return;
|
||||
default: abort();
|
||||
}
|
||||
} else if ((sew == VECTOR_SEW32 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW64 && vlmul == VECTOR_LMUL2)) {
|
||||
switch (imm) {
|
||||
case 0b0001:
|
||||
ADDI(s1, xZR, 1);
|
||||
VMV_S_X(vreg, s1);
|
||||
return;
|
||||
case 0b1010:
|
||||
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
|
||||
MOV64x(s1, 0x100000000ULL);
|
||||
VMV_V_X(vreg, s1);
|
||||
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
|
||||
return;
|
||||
default: abort();
|
||||
}
|
||||
} else if ((sew == VECTOR_SEW16 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW32 && vlmul == VECTOR_LMUL2)) {
|
||||
switch (imm) {
|
||||
case 0b01010101:
|
||||
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
|
||||
MOV64x(s1, 0x100000001ULL);
|
||||
VMV_V_X(vreg, s1);
|
||||
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
|
||||
return;
|
||||
case 0b10101010:
|
||||
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
|
||||
MOV64x(s1, 0x1000000010000ULL);
|
||||
VMV_V_X(vreg, s1);
|
||||
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
|
||||
return;
|
||||
default: abort();
|
||||
}
|
||||
} else if ((sew == VECTOR_SEW8 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW16 && vlmul == VECTOR_LMUL2)) {
|
||||
switch (imm) {
|
||||
case 0b0000000011111111:
|
||||
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
|
||||
MOV64x(s1, 0xFFFFFFFFFFFFFFFFULL);
|
||||
VMV_S_X(vreg, s1);
|
||||
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
|
||||
return;
|
||||
case 0b0101010101010101:
|
||||
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
|
||||
MOV64x(s1, 0x0001000100010001ULL);
|
||||
VMV_V_X(vreg, s1);
|
||||
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
|
||||
return;
|
||||
case 0b1010101010101010:
|
||||
vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
|
||||
MOV64x(s1, 0x0100010001000100ULL);
|
||||
VMV_V_X(vreg, s1);
|
||||
vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
|
||||
return;
|
||||
default: abort();
|
||||
}
|
||||
} else
|
||||
abort();
|
||||
} else {
|
||||
if (imm <= 0xF && (dyn->vector_eew == VECTOR_SEW32 || dyn->vector_eew == VECTOR_SEW64)) {
|
||||
VMV_V_I(vreg, imm);
|
||||
} else {
|
||||
MOV64x(s1, imm);
|
||||
VMV_V_X(vreg, s1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -1292,7 +1292,8 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr);
|
||||
#define rv64_move64 STEPNAME(rv64_move64)
|
||||
#define rv64_move32 STEPNAME(rv64_move32)
|
||||
|
||||
#define vector_vsetvli STEPNAME(vector_vsetvli)
|
||||
#define vector_vsetvli STEPNAME(vector_vsetvli)
|
||||
#define vector_loadmask STEPNAME(vector_loadmask)
|
||||
|
||||
/* setup r2 to address pointed by */
|
||||
uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta);
|
||||
@ -1449,6 +1450,7 @@ void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val);
|
||||
void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup);
|
||||
|
||||
int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, float multiple);
|
||||
void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int s1, float multiple);
|
||||
|
||||
#if STEP < 2
|
||||
#define CHECK_CACHE() 0
|
||||
|
@ -30,7 +30,8 @@
|
||||
dyn->e.olds[i].v = 0; \
|
||||
dyn->insts[ninst].f_entry = dyn->f; \
|
||||
dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \
|
||||
dyn->inst_sew = VECTOR_SEWNA; \
|
||||
dyn->inst_sew = dyn->vector_sew; \
|
||||
dyn->inst_vlmul = VECTOR_LMUL1; \
|
||||
dyn->inst_vl = 0; \
|
||||
if (ninst) \
|
||||
dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr;
|
||||
|
@ -8,7 +8,8 @@
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
dyn->e.olds[i].v = 0; \
|
||||
dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \
|
||||
dyn->inst_sew = VECTOR_SEWNA; \
|
||||
dyn->inst_sew = dyn->vector_sew; \
|
||||
dyn->inst_vlmul = VECTOR_LMUL1; \
|
||||
dyn->inst_vl = 0; \
|
||||
dyn->e.swapped = 0; \
|
||||
dyn->e.barrier = 0
|
||||
|
@ -9,7 +9,8 @@
|
||||
#define EMIT(A) do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0)
|
||||
#define NEW_INST \
|
||||
dyn->vector_sew = dyn->insts[ninst].vector_sew_entry; \
|
||||
dyn->inst_sew = VECTOR_SEWNA; \
|
||||
dyn->inst_sew = dyn->vector_sew; \
|
||||
dyn->inst_vlmul = VECTOR_LMUL1; \
|
||||
dyn->inst_vl = 0; \
|
||||
if (ninst) { \
|
||||
dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size); \
|
||||
|
@ -14,7 +14,8 @@
|
||||
#define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__)
|
||||
#define NEW_INST \
|
||||
dyn->vector_sew = dyn->insts[ninst].vector_sew_entry; \
|
||||
dyn->inst_sew = VECTOR_SEWNA; \
|
||||
dyn->inst_sew = dyn->vector_sew; \
|
||||
dyn->inst_vlmul = VECTOR_LMUL1; \
|
||||
dyn->inst_vl = 0; \
|
||||
if (box64_dynarec_dump) print_newinst(dyn, ninst); \
|
||||
if (ninst) { \
|
||||
|
@ -154,10 +154,11 @@ typedef struct dynarec_rv64_s {
|
||||
uint16_t ymm_zero; // bitmap of ymm to zero at purge
|
||||
uint8_t always_test;
|
||||
uint8_t abort;
|
||||
uint8_t vector_sew; // current sew status
|
||||
uint8_t vector_eew; // current effective sew status, should only be used after SET_ELEMENT_WIDTH
|
||||
uint8_t inst_sew; // sew inside current instruction, for vsetvli elimination
|
||||
uint8_t inst_vl; // vl inside current instruction, for vsetvli elimination
|
||||
uint8_t vector_sew; // current sew status
|
||||
uint8_t vector_eew; // current effective sew status, should only be used after SET_ELEMENT_WIDTH
|
||||
uint8_t inst_sew; // sew inside current instruction, for vsetvli elimination
|
||||
uint8_t inst_vl; // vl inside current instruction, for vsetvli elimination
|
||||
uint8_t inst_vlmul; // vlmul inside current instruction
|
||||
} dynarec_rv64_t;
|
||||
|
||||
// v0 is hardware wired to vector mask register, which should be always reserved
|
||||
|
@ -1224,6 +1224,26 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
|
||||
// Vector extension emitter
|
||||
|
||||
/* Warning: mind the differences between RVV 1.0 and XTheadVector!
|
||||
*
|
||||
* - Different encoding of vsetvl/th.vsetvl.
|
||||
* - No vsetivli instruction.
|
||||
* - Cannot configure vta and vma vsetvl instruction, the fixed value is TAMU.
|
||||
* - No whole register move instructions.
|
||||
* - No fractional lmul.
|
||||
* - Different load/store instructions.
|
||||
* - Different name of vector indexed instructions.
|
||||
* - Destination vector register cannot overlap source vector register group for vmadc/vmsbc/widen arithmetic/narrow arithmetic.
|
||||
* - No vlm/vsm instructions.
|
||||
* - Different vnsrl/vnsra/vfncvt suffix (vv/vx/vi vs wv/wx/wi).
|
||||
* - Different size of mask mode (1.0 is vl and xtheadvector is vlen).
|
||||
* - No vrgatherei16.vv instruction.
|
||||
* - Different encoding of vmv.s.x instruction.
|
||||
*
|
||||
* We ignore all the naming differences and use the RVV 1.0 naming convention.
|
||||
|
||||
*/
|
||||
|
||||
#define VECTOR_SEW8 0b000
|
||||
#define VECTOR_SEW16 0b001
|
||||
#define VECTOR_SEW32 0b010
|
||||
@ -1277,15 +1297,16 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
|
||||
// Vector Indexed-Unordered Instructions (including segment part)
|
||||
// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#76-vector-indexed-instructions
|
||||
// Note: Make sure SEW in vtype is always the same as EEW, for xtheadvector compatibility!
|
||||
|
||||
#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111
|
||||
#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111
|
||||
#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111
|
||||
#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111
|
||||
#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111
|
||||
#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111
|
||||
#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111
|
||||
#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111
|
||||
#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111
|
||||
#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111
|
||||
#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111
|
||||
#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111
|
||||
#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111
|
||||
#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111
|
||||
#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111
|
||||
#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111
|
||||
|
||||
// Vector Strided Instructions (including segment part)
|
||||
// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#75-vector-strided-instructions
|
||||
@ -1420,31 +1441,32 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
#define VFMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101110...........001.....1010111
|
||||
#define VFNMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101111...........001.....1010111
|
||||
|
||||
#define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111
|
||||
#define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111
|
||||
#define VFCVT_F_XU_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00010, 0b001, vd, 0b1010111)) // 010010......00010001.....1010111
|
||||
#define VFCVT_F_X_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00011, 0b001, vd, 0b1010111)) // 010010......00011001.....1010111
|
||||
#define VFCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00110, 0b001, vd, 0b1010111)) // 010010......00110001.....1010111
|
||||
#define VFCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00111, 0b001, vd, 0b1010111)) // 010010......00111001.....1010111
|
||||
#define VFWCVT_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01000, 0b001, vd, 0b1010111)) // 010010......01000001.....1010111
|
||||
#define VFWCVT_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01001, 0b001, vd, 0b1010111)) // 010010......01001001.....1010111
|
||||
#define VFWCVT_F_XU_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01010, 0b001, vd, 0b1010111)) // 010010......01010001.....1010111
|
||||
#define VFWCVT_F_X_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01011, 0b001, vd, 0b1010111)) // 010010......01011001.....1010111
|
||||
#define VFWCVT_F_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01100, 0b001, vd, 0b1010111)) // 010010......01100001.....1010111
|
||||
#define VFWCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01110, 0b001, vd, 0b1010111)) // 010010......01110001.....1010111
|
||||
#define VFWCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01111, 0b001, vd, 0b1010111)) // 010010......01111001.....1010111
|
||||
#define VFNCVT_XU_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010010......10000001.....1010111
|
||||
#define VFNCVT_X_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10001, 0b001, vd, 0b1010111)) // 010010......10001001.....1010111
|
||||
#define VFNCVT_F_XU_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10010, 0b001, vd, 0b1010111)) // 010010......10010001.....1010111
|
||||
#define VFNCVT_F_X_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10011, 0b001, vd, 0b1010111)) // 010010......10011001.....1010111
|
||||
#define VFNCVT_F_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10100, 0b001, vd, 0b1010111)) // 010010......10100001.....1010111
|
||||
#define VFNCVT_ROD_F_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10101, 0b001, vd, 0b1010111)) // 010010......10101001.....1010111
|
||||
#define VFNCVT_RTZ_XU_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10110, 0b001, vd, 0b1010111)) // 010010......10110001.....1010111
|
||||
#define VFNCVT_RTZ_X_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10111, 0b001, vd, 0b1010111)) // 010010......10111001.....1010111
|
||||
#define VFSQRT_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010011......00000001.....1010111
|
||||
#define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111
|
||||
#define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111
|
||||
#define VFCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00010, 0b001, vd, 0b1010111)) // 010010......00010001.....1010111
|
||||
#define VFCVT_F_X_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00011, 0b001, vd, 0b1010111)) // 010010......00011001.....1010111
|
||||
#define VFCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00110, 0b001, vd, 0b1010111)) // 010010......00110001.....1010111
|
||||
#define VFCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00111, 0b001, vd, 0b1010111)) // 010010......00111001.....1010111
|
||||
#define VFWCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01000, 0b001, vd, 0b1010111)) // 010010......01000001.....1010111
|
||||
#define VFWCVT_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01001, 0b001, vd, 0b1010111)) // 010010......01001001.....1010111
|
||||
#define VFWCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01010, 0b001, vd, 0b1010111)) // 010010......01010001.....1010111
|
||||
#define VFWCVT_F_X_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01011, 0b001, vd, 0b1010111)) // 010010......01011001.....1010111
|
||||
#define VFWCVT_F_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01100, 0b001, vd, 0b1010111)) // 010010......01100001.....1010111
|
||||
#define VFWCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01110, 0b001, vd, 0b1010111)) // 010010......01110001.....1010111
|
||||
#define VFWCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01111, 0b001, vd, 0b1010111)) // 010010......01111001.....1010111
|
||||
#define VFNCVT_XU_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010010......10000001.....1010111
|
||||
#define VFNCVT_X_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10001, 0b001, vd, 0b1010111)) // 010010......10001001.....1010111
|
||||
#define VFNCVT_F_XU_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10010, 0b001, vd, 0b1010111)) // 010010......10010001.....1010111
|
||||
#define VFNCVT_F_X_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10011, 0b001, vd, 0b1010111)) // 010010......10011001.....1010111
|
||||
#define VFNCVT_F_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10100, 0b001, vd, 0b1010111)) // 010010......10100001.....1010111
|
||||
#define VFNCVT_ROD_F_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10101, 0b001, vd, 0b1010111)) // 010010......10101001.....1010111
|
||||
#define VFNCVT_RTZ_XU_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10110, 0b001, vd, 0b1010111)) // 010010......10110001.....1010111
|
||||
#define VFNCVT_RTZ_X_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10111, 0b001, vd, 0b1010111)) // 010010......10111001.....1010111
|
||||
#define VFSQRT_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010011......00000001.....1010111
|
||||
#define VFCLASS_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111
|
||||
|
||||
#define VFRSQRT7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00100, 0b001, vd, 0b1010111)) // 010011......00100001.....1010111
|
||||
#define VFREC7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00101, 0b001, vd, 0b1010111)) // 010011......00101001.....1010111
|
||||
#define VFCLASS_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111
|
||||
|
||||
#define VFWADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1100000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110000...........001.....1010111
|
||||
#define VFWREDUSUM_VS(vd, vs2, vs1, vm) EMIT(R_type(0b1100010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110001...........001.....1010111
|
||||
@ -1473,10 +1495,10 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
#define VSLIDEUP_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001110...........100.....1010111
|
||||
#define VSLIDEDOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001111...........100.....1010111
|
||||
|
||||
#define VADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100000, vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111
|
||||
#define VADC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111
|
||||
#define VMADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100010, vs2, rs1, 0b100, vd, 0b1010111)) // 0100010..........100.....1010111
|
||||
#define VMADC_VX(vd, vs2, rs1) EMIT(R_type(0b0100011, vs2, rs1, 0b100, vd, 0b1010111)) // 0100011..........100.....1010111
|
||||
#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100100, vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111
|
||||
#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100100 | rv64_xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111
|
||||
#define VMSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100110, vs2, rs1, 0b100, vd, 0b1010111)) // 0100110..........100.....1010111
|
||||
#define VMSBC_VX(vd, vs2, rs1) EMIT(R_type(0b0100111, vs2, rs1, 0b100, vd, 0b1010111)) // 0100111..........100.....1010111
|
||||
#define VMERGE_VXM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b100, vd, 0b1010111)) // 0101110..........100.....1010111
|
||||
@ -1519,10 +1541,10 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
#define VRGATHER_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001100...........000.....1010111
|
||||
#define VRGATHEREI16_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001110...........000.....1010111
|
||||
|
||||
#define VADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100000, vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111
|
||||
#define VADC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111
|
||||
#define VMADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100010, vs2, vs1, 0b000, vd, 0b1010111)) // 0100010..........000.....1010111
|
||||
#define VMADC_VV(vd, vs2, vs1) EMIT(R_type(0b0100011, vs2, vs1, 0b000, vd, 0b1010111)) // 0100011..........000.....1010111
|
||||
#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100100, vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111
|
||||
#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100100 | rv64_xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111
|
||||
#define VMSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100110, vs2, vs1, 0b000, vd, 0b1010111)) // 0100110..........000.....1010111
|
||||
#define VMSBC_VV(vd, vs2, vs1) EMIT(R_type(0b0100111, vs2, vs1, 0b000, vd, 0b1010111)) // 0100111..........000.....1010111
|
||||
#define VMERGE_VVM(vd, vs2, vs1) EMIT(R_type(0b0101110, vs2, vs1, 0b000, vd, 0b1010111)) // 0101110..........000.....1010111
|
||||
@ -1562,7 +1584,7 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
#define VSLIDEUP_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001110...........011.....1010111
|
||||
#define VSLIDEDOWN_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001111...........011.....1010111
|
||||
|
||||
#define VADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100000, vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111
|
||||
#define VADC_VIM(vd, vs2, simm5) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111
|
||||
#define VMADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100010, vs2, simm5, 0b011, vd, 0b1010111)) // 0100010..........011.....1010111
|
||||
#define VMADC_VI(vd, vs2, simm5) EMIT(R_type(0b0100011, vs2, simm5, 0b011, vd, 0b1010111)) // 0100011..........011.....1010111
|
||||
#define VMERGE_VIM(vd, vs2, simm5) EMIT(R_type(0b0101110, vs2, simm5, 0b011, vd, 0b1010111)) // 0101110..........011.....1010111
|
||||
@ -1602,12 +1624,13 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
#define VREDMIN_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000101...........010.....1010111
|
||||
#define VREDMAXU_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000110...........010.....1010111
|
||||
#define VREDMAX_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000111...........010.....1010111
|
||||
#define VAADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001000...........010.....1010111
|
||||
#define VAADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001001...........010.....1010111
|
||||
#define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111
|
||||
#define VASUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001011...........010.....1010111
|
||||
// Warning, no unsigned edition in Xtheadvector
|
||||
#define VAADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001000...........010.....1010111
|
||||
#define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111
|
||||
|
||||
#define VMV_X_S(rd, vs2) EMIT(R_type(0b0100001, vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111
|
||||
#define VMV_X_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111
|
||||
|
||||
// Vector Integer Extension Instructions
|
||||
// https://github.com/riscv/riscv-v-spec/blob/e49574c92b072fd4d71e6cb20f7e8154de5b83fe/v-spec.adoc#123-vector-integer-extension
|
||||
@ -1629,12 +1652,12 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
#define VMNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111101, vs2, vs1, 0b010, vd, 0b1010111)) // 0111101..........010.....1010111
|
||||
#define VMXNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111111, vs2, vs1, 0b010, vd, 0b1010111)) // 0111111..........010.....1010111
|
||||
|
||||
#define VMSBF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111
|
||||
#define VMSOF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111
|
||||
#define VMSIF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00011, 0b010, vd, 0b1010111)) // 010100......00011010.....1010111
|
||||
#define VIOTA_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b10000, 0b010, vd, 0b1010111)) // 010100......10000010.....1010111
|
||||
#define VCPOP_M(rd, vs2, vm) EMIT(R_type(0b0100000 | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111
|
||||
#define VFIRST_M(rd, vs2, vm) EMIT(R_type(0b0100000 | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111
|
||||
#define VMSBF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111
|
||||
#define VMSOF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111
|
||||
#define VMSIF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00011, 0b010, vd, 0b1010111)) // 010100......00011010.....1010111
|
||||
#define VIOTA_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b10000, 0b010, vd, 0b1010111)) // 010100......10000010.....1010111
|
||||
#define VCPOP_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111
|
||||
#define VFIRST_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111
|
||||
|
||||
#define VID_V(vd, vm) EMIT(R_type(0b0101000 | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111
|
||||
|
||||
@ -1673,7 +1696,8 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
#define VSLIDE1UP_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001110...........110.....1010111
|
||||
#define VSLIDE1DOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001111...........110.....1010111
|
||||
|
||||
#define VMV_S_X(vd, rs1) EMIT(I_type(0b010000100000, rs1, 0b110, vd, 0b1010111)) // 010000100000.....110.....1010111
|
||||
// Warning, upper elements will be cleared in xtheadvector!
|
||||
#define VMV_S_X(vd, rs1) EMIT(I_type((rv64_xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b110, vd, 0b1010111))
|
||||
|
||||
#define VDIVU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100000...........110.....1010111
|
||||
#define VDIV_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100001...........110.....1010111
|
||||
|
@ -362,6 +362,37 @@ const char* rv64_print(uint32_t opcode, uintptr_t addr)
|
||||
}
|
||||
}
|
||||
|
||||
if (rv64_xtheadvector) {
|
||||
/* These are written by hand.... */
|
||||
|
||||
// rv_v, VSETVLI
|
||||
if ((opcode & 0x8000707f) == 0x7057) {
|
||||
a.imm = FX(opcode, 30, 20);
|
||||
a.rs1 = FX(opcode, 19, 15);
|
||||
a.rd = FX(opcode, 11, 7);
|
||||
const char *lmul_str, *sew_str;
|
||||
switch (a.imm & 0b11) {
|
||||
case 0b00: lmul_str = "m1"; break;
|
||||
case 0b01: lmul_str = "m2"; break;
|
||||
case 0b10: lmul_str = "m4"; break;
|
||||
case 0b11: lmul_str = "m8"; break;
|
||||
default: lmul_str = "reserved"; break;
|
||||
}
|
||||
switch ((a.imm & 0b0011100) >> 2) {
|
||||
case 0b000: sew_str = "e8"; break;
|
||||
case 0b001: sew_str = "e16"; break;
|
||||
case 0b010: sew_str = "e32"; break;
|
||||
case 0b011: sew_str = "e64"; break;
|
||||
default: sew_str = "reserved"; break;
|
||||
}
|
||||
|
||||
snprintf(buff, sizeof(buff), "%-15s %s, %s, %s, %s", "VSETVLI", gpr[a.rd], gpr[a.rs1], sew_str, lmul_str);
|
||||
return buff;
|
||||
}
|
||||
|
||||
// TODO: add more...
|
||||
}
|
||||
|
||||
/****************
|
||||
* Generated by https://github.com/ksco/riscv-opcodes/tree/box64_printer
|
||||
* Command: python parse.py -box64 rv_a rv_d rv_f rv_i rv_m rv_v rv_zba rv_zbb rv_zbc rv_zicsr rv_zbs rv64_a rv64_d rv64_f rv64_i rv64_m rv64_zba rv64_zbb rv64_zbs > code.c
|
||||
|
@ -56,6 +56,7 @@ extern int rv64_zbb;
|
||||
extern int rv64_zbc;
|
||||
extern int rv64_zbs;
|
||||
extern int rv64_vector;
|
||||
extern int rv64_xtheadvector; // rvv 1.0 or xtheadvector
|
||||
extern int rv64_vlen;
|
||||
extern int rv64_xtheadba;
|
||||
extern int rv64_xtheadbb;
|
||||
|
@ -68,12 +68,18 @@ void RV64_Detect_Function()
|
||||
BR(xRA);
|
||||
rv64_zbs = Check(my_block);
|
||||
|
||||
// Test Vector v1.0 with CSRR zero, vcsr
|
||||
block = (uint32_t*)my_block;
|
||||
CSRRS(xZR, xZR, 0x00f);
|
||||
CSRRS(xZR, xZR, 0xc22 /* vlenb */);
|
||||
BR(xRA);
|
||||
rv64_vector = Check(my_block);
|
||||
|
||||
if (rv64_vector) {
|
||||
block = (uint32_t*)my_block;
|
||||
CSRRS(xZR, xZR, 0x00f /* vcsr */); // vcsr does not exists in xtheadvector
|
||||
BR(xRA);
|
||||
rv64_xtheadvector = !Check(my_block);
|
||||
}
|
||||
|
||||
if (rv64_vector) {
|
||||
int vlenb = 0;
|
||||
asm volatile("csrr %0, 0xc22" : "=r"(vlenb));
|
||||
|
Loading…
Reference in New Issue
Block a user