diff --git a/src/core.c b/src/core.c index 3804a577..31cb413d 100644 --- a/src/core.c +++ b/src/core.c @@ -112,7 +112,8 @@ int rv64_zba = 0; int rv64_zbb = 0; int rv64_zbc = 0; int rv64_zbs = 0; -int rv64_vector = 0; +int rv64_vector = 0; // rvv 1.0 or xtheadvector +int rv64_xtheadvector = 0; int rv64_vlen = 0; int rv64_xtheadba = 0; int rv64_xtheadbb = 0; @@ -516,6 +517,7 @@ HWCAP2_AFP if (p != NULL && !strcasecmp(p, "vector")) { RV64_Detect_Function(); rv64_vector = 0; + rv64_xtheadvector = 0; } printf_log(LOG_INFO, "Dynarec for RISC-V "); printf_log(LOG_INFO, "With extension: I M A F D C"); @@ -523,16 +525,18 @@ HWCAP2_AFP if(rv64_zbb) printf_log(LOG_INFO, " Zbb"); if(rv64_zbc) printf_log(LOG_INFO, " Zbc"); if(rv64_zbs) printf_log(LOG_INFO, " Zbs"); - if (rv64_vector) printf_log(LOG_INFO, " Vector (vlen: %d)", rv64_vlen); + if (rv64_vector && !rv64_xtheadvector) printf_log(LOG_INFO, " Vector (vlen: %d)", rv64_vlen); + if (rv64_xtheadvector) printf_log(LOG_INFO, " XTheadVector (vlen: %d)", rv64_vlen); if(rv64_xtheadba) printf_log(LOG_INFO, " XTheadBa"); if(rv64_xtheadbb) printf_log(LOG_INFO, " XTheadBb"); if(rv64_xtheadbs) printf_log(LOG_INFO, " XTheadBs"); - if(rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov"); - if(rv64_xtheadmemidx) printf_log(LOG_INFO, " XTheadMemIdx"); - if(rv64_xtheadmempair) printf_log(LOG_INFO, " XTheadMemPair"); - if(rv64_xtheadfmemidx) printf_log(LOG_INFO, " XTheadFMemIdx"); - if(rv64_xtheadmac) printf_log(LOG_INFO, " XTheadMac"); - if(rv64_xtheadfmv) printf_log(LOG_INFO, " XTheadFmv"); + if (rv64_xtheadmempair) printf_log(LOG_INFO, " XTheadMemPair"); + // Disable the display since these are only detected but never used. + // if(rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov"); + // if(rv64_xtheadmemidx) printf_log(LOG_INFO, " XTheadMemIdx"); + // if(rv64_xtheadfmemidx) printf_log(LOG_INFO, " XTheadFMemIdx"); + // if(rv64_xtheadmac) printf_log(LOG_INFO, " XTheadMac"); + // if(rv64_xtheadfmv) printf_log(LOG_INFO, " XTheadFmv"); #else #error Unsupported architecture #endif diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 454fa02a..20cad81c 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -98,20 +98,44 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, nextop = F8; if (MODREG) { INST_NAME("MOVHLPS Gx, Ex"); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); - GETGX_vector(v0, 1, dyn->vector_eew); - GETEX_vector(v1, 0, 0, VECTOR_SEW64); + if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); + GETEX_vector(v1, 0, 0, VECTOR_SEW64); + } else { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); // unaligned! + GETGX_vector(v0, 1, VECTOR_SEW8); + GETEX_vector(v1, 0, 0, VECTOR_SEW8); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + } q0 = fpu_get_scratch(dyn); VSLIDEDOWN_VI(q0, v1, 1, VECTOR_UNMASKED); - VMV_X_S(x4, q0); - VMV_S_X(v0, x4); + if (rv64_xtheadvector) { + vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); + VMERGE_VVM(v0, v0, q0); // implies VMASK + } else { + VMV_X_S(x4, q0); + VMV_S_X(v0, x4); + } } else { INST_NAME("MOVLPS Gx, Ex"); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); - GETGX_vector(v0, 1, VECTOR_SEW64); - GETEX_vector(v1, 0, 0, VECTOR_SEW64); - VMV_X_S(x4, v1); - VMV_S_X(v0, x4); + if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); + GETEX_vector(v1, 0, 0, VECTOR_SEW64); + } else { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); // unaligned! + GETGX_vector(v0, 1, VECTOR_SEW8); + GETEX_vector(v1, 0, 0, VECTOR_SEW8); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + } + if (rv64_xtheadvector) { + vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); + VMERGE_VVM(v0, v0, v1); // implies VMASK + } else { + VMV_X_S(x4, v1); + VMV_S_X(v0, x4); + } } break; case 0x16: @@ -134,8 +158,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); v1 = fpu_get_scratch(dyn); - MOV64x(x4, 0xFF); - VMV_S_X(VMASK, x4); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); VSLIDEUP_VI(v0, v1, 8, VECTOR_UNMASKED); } @@ -150,8 +173,13 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); - VMV_X_S(x4, q0); - VMV_S_X(v1, x4); + if (rv64_xtheadvector) { + vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); + VMERGE_VVM(v1, v1, q0); // implies VMASK + } else { + VMV_X_S(x4, q0); + VMV_S_X(v1, x4); + } } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); q0 = fpu_get_scratch(dyn); @@ -209,6 +237,8 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } break; case 0xC6: + if (rv64_xtheadvector) return 0; // lack of vrgatherei16.vv + INST_NAME("SHUFPS Gx, Ex, Ib"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index d43297d3..3719b5a1 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -99,14 +99,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } else { q0 = fpu_get_scratch(dyn); VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); - VMV_V_I(VMASK, 0b10); + vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); VLUXEI64_V(v0, q0, ed, VECTOR_MASKED, VECTOR_NFIELD1); } break; case 0x15: - INST_NAME("PUNPCKHQDQ Gx, Ex"); + INST_NAME("UNPCKHPD Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); // GX->q[0] = GX->q[1]; @@ -114,14 +114,19 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_vector(v0, 1, VECTOR_SEW64); if (MODREG) { v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); - q0 == fpu_get_scratch(dyn); + q0 = fpu_get_scratch(dyn); VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); - VMV_X_S(x4, q0); - if (v0 != v1) { VMV_V_V(v0, v1); } - VMV_S_X(v0, x4); + if (rv64_xtheadvector) { + vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); + VMERGE_VVM(v0, v1, q0); // implies VMASK + } else { + if (v0 != v1) { VMV_V_V(v0, v1); } + VMV_X_S(x4, q0); + VMV_S_X(v0, x4); + } } else { q0 = fpu_get_scratch(dyn); - VMV_V_I(VMASK, 0b10); + vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1); VSLIDE1DOWN_VX(v0, v0, xZR, VECTOR_UNMASKED); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); @@ -197,8 +202,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (q1 & 1) VMV_V_V(d1, q1); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2); VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 8, VECTOR_UNMASKED); - MOV64x(x4, 0b0101010101010101); - VMV_S_X(VMASK, x4); + vector_loadmask(dyn, ninst, VMASK, 0b0101010101010101, x4, 2); VCOMPRESS_VM(d0, v0, VMASK); VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); VCOMPRESS_VM(d1, v0, VMASK); @@ -219,8 +223,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (q1 & 1) VMV_V_V(d1, q1); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2); VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 4, VECTOR_UNMASKED); - MOV64x(x4, 0b01010101); - VMV_S_X(VMASK, x4); + vector_loadmask(dyn, ninst, VMASK, 0b01010101, x4, 2); VCOMPRESS_VM(d0, v0, VMASK); VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); VCOMPRESS_VM(d1, v0, VMASK); @@ -238,8 +241,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches! VWMULSU_VV(v0, q1, q0, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2); - MOV64x(x4, 0b0101010101010101); - VMV_S_X(VMASK, x4); + vector_loadmask(dyn, ninst, VMASK, 0b0101010101010101, x4, 2); VCOMPRESS_VM(d0, v0, VMASK); VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); VCOMPRESS_VM(d1, v0, VMASK); @@ -307,6 +309,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VADD_VX(q0, q1, xZR, VECTOR_MASKED); break; case 0x17: + if (rv64_xtheadvector) return 0; // TODO: VMASK convertion + INST_NAME("PTEST Gx, Ex"); nextop = F8; SETFLAGS(X_ALL, SF_SET); @@ -613,6 +617,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i opcode = F8; switch (opcode) { case 0x0E: + if (rv64_xtheadvector) return 0; // TODO: VMASK convertion + INST_NAME("PBLENDW Gx, Ex, Ib"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); @@ -668,6 +674,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0x50: + if (rv64_xtheadvector) return 0; // TODO: VMASK convertion + INST_NAME("PMOVMSKD Gd, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); @@ -848,8 +856,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("PUNPCKLBW Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - MOV64x(x1, 0b1010101010101010); - VMV_V_X(VMASK, x1); // VMASK = 0b1010101010101010 + vector_loadmask(dyn, ninst, VMASK, 0b1010101010101010, x1, 1); v0 = fpu_get_scratch(dyn); VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 7 7 6 6 5 5 4 4 3 3 2 2 1 1 0 0 GETGX_vector(q0, 1, VECTOR_SEW8); @@ -864,8 +871,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("PUNPCKLWD Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); - ADDI(x1, xZR, 0b10101010); - VMV_V_X(VMASK, x1); // VMASK = 0b10101010 + vector_loadmask(dyn, ninst, VMASK, 0b10101010, x1, 1); v0 = fpu_get_scratch(dyn); VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 3 3 2 2 1 1 0 0 GETGX_vector(q0, 1, VECTOR_SEW16); @@ -880,8 +886,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("PUNPCKLDQ Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); - ADDI(x1, xZR, 0b1010); - VMV_V_X(VMASK, x1); // VMASK = 0b1010 + vector_loadmask(dyn, ninst, VMASK, 0b1010, x1, 1); v0 = fpu_get_scratch(dyn); VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 1 1 0 0 GETGX_vector(q0, 1, VECTOR_SEW32); @@ -961,8 +966,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("PUNPCKHBW Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - ADDI(x1, xZR, 0b1010101010101010); - VMV_V_X(VMASK, x1); // VMASK = 0b1010101010101010 + vector_loadmask(dyn, ninst, VMASK, 0b1010101010101010, x1, 1); v0 = fpu_get_scratch(dyn); VIOTA_M(v0, VMASK, VECTOR_UNMASKED); VADD_VI(v0, v0, 8, VECTOR_UNMASKED); // v0 = 15 15 14 14 13 13 12 12 11 11 10 10 9 9 8 8 @@ -970,8 +974,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("PUNPCKHWD Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); - ADDI(x1, xZR, 0b10101010); - VMV_V_X(VMASK, x1); // VMASK = 0b10101010 + vector_loadmask(dyn, ninst, VMASK, 0b10101010, x1, 1); v0 = fpu_get_scratch(dyn); VIOTA_M(v0, VMASK, VECTOR_UNMASKED); VADD_VI(v0, v0, 4, VECTOR_UNMASKED); // v0 = 7 7 6 6 5 5 4 4 @@ -979,7 +982,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("PUNPCKHDQ Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); - VMV_V_I(VMASK, 0b1010); + vector_loadmask(dyn, ninst, VMASK, 0b1010, x1, 1); v0 = fpu_get_scratch(dyn); VIOTA_M(v0, VMASK, VECTOR_UNMASKED); VADD_VI(v0, v0, 2, VECTOR_UNMASKED); // v0 = 3 3 2 2 @@ -1029,7 +1032,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } else { q0 = fpu_get_scratch(dyn); VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); - VMV_V_I(VMASK, 0b10); + vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); VLUXEI64_V(v0, q0, ed, VECTOR_MASKED, VECTOR_NFIELD1); @@ -1044,14 +1047,19 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_vector(v0, 1, VECTOR_SEW64); if (MODREG) { v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); - q0 == fpu_get_scratch(dyn); + q0 = fpu_get_scratch(dyn); VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); - VMV_X_S(x4, q0); - if (v0 != v1) { VMV_V_V(v0, v1); } - VMV_S_X(v0, x4); + if (rv64_xtheadvector) { + vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); + VMERGE_VVM(v0, v1, q0); // implies VMASK + } else { + if (v0 != v1) { VMV_V_V(v0, v1); } + VMV_X_S(x4, q0); + VMV_S_X(v0, x4); + } } else { q0 = fpu_get_scratch(dyn); - VMV_V_I(VMASK, 0b10); + vector_loadmask(dyn, ninst, VMASK, 0b10, x1, 1); VSLIDE1DOWN_VX(v0, v0, xZR, VECTOR_UNMASKED); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); @@ -1069,7 +1077,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x3, VECTOR_SEW32, 1); } VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); - VMV_V_I(VMASK, 1); + vector_loadmask(dyn, ninst, VMASK, 1, x4, 1); VMERGE_VXM(v0, v0, ed); break; case 0x6F: @@ -1088,6 +1096,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0x70: + if (rv64_xtheadvector) return 0; // lack of vrgatherei16.vv + INST_NAME("PSHUFD Gx, Ex, Ib"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); @@ -1316,6 +1326,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i break; case 0xA3 ... 0xC1: return 0; case 0xC4: + if (rv64_xtheadvector) return 0; // TODO: VMASK convertion + INST_NAME("PINSRW Gx, Ed, Ib"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); @@ -1377,7 +1389,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); } else { - VMV_V_I(VMASK, 0b01); + vector_loadmask(dyn, ninst, VMASK, 1, x1, 1); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); q1 = fpu_get_scratch(dyn); @@ -1417,7 +1429,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { q1 = sse_get_reg_empty_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3)); VMV_X_S(x4, q0); - VXOR_VV(q1, q1, q1, VECTOR_UNMASKED); + if (!rv64_xtheadvector) { + VXOR_VV(q1, q1, q1, VECTOR_UNMASKED); + } VMV_S_X(q1, x4); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); @@ -1427,6 +1441,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0xD7: + if (rv64_xtheadvector) return 0; // TODO: VMASK convertion + INST_NAME("PMOVMSKB Gd, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); @@ -1503,6 +1519,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VAND_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xE0: + if (rv64_xtheadvector) return 0; // lack of vaddu.vv + INST_NAME("PAVGB Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); @@ -1516,7 +1534,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(q0, 1, VECTOR_SEW64); - VMV_V_I(VMASK, 0b01); + vector_loadmask(dyn, ninst, VMASK, 1, x1, 1); if (MODREG) { q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); } else { @@ -1537,7 +1555,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(q0, 1, VECTOR_SEW64); - VMV_V_I(VMASK, 0b01); + vector_loadmask(dyn, ninst, VMASK, 1, x1, 1); if (MODREG) { q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); } else { @@ -1554,6 +1572,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VSRA_VX(q0, q0, x4, VECTOR_UNMASKED); break; case 0xE3: + if (rv64_xtheadvector) return 0; // lack of vaddu.vv + INST_NAME("PAVGW Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); @@ -1672,7 +1692,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); } else { - VMV_V_I(VMASK, 0b01); + vector_loadmask(dyn, ninst, VMASK, 1, x1, 1); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); q1 = fpu_get_scratch(dyn); @@ -1688,6 +1708,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VSLL_VX(q0, q0, x4, VECTOR_UNMASKED); break; case 0xF5: + if (rv64_xtheadvector) return 0; // lack of vrgatherei16.vv + INST_NAME("PMADDWD Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); @@ -1722,9 +1744,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VSRA_VI(v1, v0, 15, VECTOR_UNMASKED); VXOR_VV(v0, v1, v0, VECTOR_UNMASKED); VSUB_VV(v1, v0, v1, VECTOR_UNMASKED); - ADDI(x4, xZR, 0xFF); - VXOR_VV(VMASK, VMASK, VMASK, VECTOR_UNMASKED); - VMV_S_X(VMASK, x4); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 2); VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VREDSUM_VS(v0, v1, v0, VECTOR_MASKED); // sum low 64 VSLIDEDOWN_VI(d0, v1, 8, VECTOR_UNMASKED); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 975dff34..533a646f 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -2606,17 +2606,11 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f { if (sew == VECTOR_SEWNA) return VECTOR_SEW8; if (sew == VECTOR_SEWANY) sew = VECTOR_SEW8; - /* mu: mask undisturbed - * tu: tail undisturbed - * sew: selected element width - * lmul: vector register group multiplier - * - * mu tu sew lmul */ - uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | vlmul; - uint32_t vl = (int)((float)(16 >> sew) * multiple); + uint32_t vl = (int)((float)(16 >> sew) * multiple); + uint32_t vtypei = (sew << (3 - !!rv64_xtheadvector)) | vlmul; if (dyn->inst_sew == VECTOR_SEWNA || dyn->inst_vl == 0 || dyn->inst_sew != sew || dyn->inst_vl != vl) { - if (vl <= 31) { + if (vl <= 31 && !rv64_xtheadvector) { VSETIVLI(xZR, vl, vtypei); } else { ADDI(s1, xZR, vl); @@ -2625,5 +2619,96 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f } dyn->inst_sew = sew; dyn->inst_vl = vl; + dyn->inst_vlmul = vlmul; return sew; } + +void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int s1, float multiple) +{ +#if STEP > 1 + uint8_t sew = dyn->inst_sew; + uint8_t vlmul = dyn->inst_vlmul; + if (rv64_xtheadvector) { + if (sew == VECTOR_SEW64 && vlmul == VECTOR_LMUL1) { + switch (imm) { + case 0: + VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED); + return; + case 1: + ADDI(s1, xZR, 1); + VMV_S_X(vreg, s1); + return; + case 2: + int scratch = fpu_get_scratch(dyn); + VMV_V_I(scratch, 1); + VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED); + return; + case 3: + VMV_V_I(vreg, 1); + return; + default: abort(); + } + } else if ((sew == VECTOR_SEW32 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW64 && vlmul == VECTOR_LMUL2)) { + switch (imm) { + case 0b0001: + ADDI(s1, xZR, 1); + VMV_S_X(vreg, s1); + return; + case 0b1010: + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000000ULL); + VMV_V_X(vreg, s1); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + default: abort(); + } + } else if ((sew == VECTOR_SEW16 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW32 && vlmul == VECTOR_LMUL2)) { + switch (imm) { + case 0b01010101: + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000001ULL); + VMV_V_X(vreg, s1); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + case 0b10101010: + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x1000000010000ULL); + VMV_V_X(vreg, s1); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + default: abort(); + } + } else if ((sew == VECTOR_SEW8 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW16 && vlmul == VECTOR_LMUL2)) { + switch (imm) { + case 0b0000000011111111: + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0xFFFFFFFFFFFFFFFFULL); + VMV_S_X(vreg, s1); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + case 0b0101010101010101: + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x0001000100010001ULL); + VMV_V_X(vreg, s1); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + case 0b1010101010101010: + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x0100010001000100ULL); + VMV_V_X(vreg, s1); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + default: abort(); + } + } else + abort(); + } else { + if (imm <= 0xF && (dyn->vector_eew == VECTOR_SEW32 || dyn->vector_eew == VECTOR_SEW64)) { + VMV_V_I(vreg, imm); + } else { + MOV64x(s1, imm); + VMV_V_X(vreg, s1); + } + } +#endif +} diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index eedd4a61..d6380ea7 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1292,7 +1292,8 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr); #define rv64_move64 STEPNAME(rv64_move64) #define rv64_move32 STEPNAME(rv64_move32) -#define vector_vsetvli STEPNAME(vector_vsetvli) +#define vector_vsetvli STEPNAME(vector_vsetvli) +#define vector_loadmask STEPNAME(vector_loadmask) /* setup r2 to address pointed by */ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta); @@ -1449,6 +1450,7 @@ void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val); void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup); int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, float multiple); +void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int s1, float multiple); #if STEP < 2 #define CHECK_CACHE() 0 diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index 4a1603d0..f026548a 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -30,7 +30,8 @@ dyn->e.olds[i].v = 0; \ dyn->insts[ninst].f_entry = dyn->f; \ dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \ - dyn->inst_sew = VECTOR_SEWNA; \ + dyn->inst_sew = dyn->vector_sew; \ + dyn->inst_vlmul = VECTOR_LMUL1; \ dyn->inst_vl = 0; \ if (ninst) \ dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr; diff --git a/src/dynarec/rv64/dynarec_rv64_pass1.h b/src/dynarec/rv64/dynarec_rv64_pass1.h index 0aa8e010..b488ef86 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass1.h +++ b/src/dynarec/rv64/dynarec_rv64_pass1.h @@ -8,7 +8,8 @@ for (int i = 0; i < 16; ++i) \ dyn->e.olds[i].v = 0; \ dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \ - dyn->inst_sew = VECTOR_SEWNA; \ + dyn->inst_sew = dyn->vector_sew; \ + dyn->inst_vlmul = VECTOR_LMUL1; \ dyn->inst_vl = 0; \ dyn->e.swapped = 0; \ dyn->e.barrier = 0 diff --git a/src/dynarec/rv64/dynarec_rv64_pass2.h b/src/dynarec/rv64/dynarec_rv64_pass2.h index 71b14b2e..7cfbc724 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass2.h +++ b/src/dynarec/rv64/dynarec_rv64_pass2.h @@ -9,7 +9,8 @@ #define EMIT(A) do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0) #define NEW_INST \ dyn->vector_sew = dyn->insts[ninst].vector_sew_entry; \ - dyn->inst_sew = VECTOR_SEWNA; \ + dyn->inst_sew = dyn->vector_sew; \ + dyn->inst_vlmul = VECTOR_LMUL1; \ dyn->inst_vl = 0; \ if (ninst) { \ dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size); \ diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h index 85337a61..d8e7dfa4 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass3.h +++ b/src/dynarec/rv64/dynarec_rv64_pass3.h @@ -14,7 +14,8 @@ #define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__) #define NEW_INST \ dyn->vector_sew = dyn->insts[ninst].vector_sew_entry; \ - dyn->inst_sew = VECTOR_SEWNA; \ + dyn->inst_sew = dyn->vector_sew; \ + dyn->inst_vlmul = VECTOR_LMUL1; \ dyn->inst_vl = 0; \ if (box64_dynarec_dump) print_newinst(dyn, ninst); \ if (ninst) { \ diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index ca1d5e96..4f552e6c 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -154,10 +154,11 @@ typedef struct dynarec_rv64_s { uint16_t ymm_zero; // bitmap of ymm to zero at purge uint8_t always_test; uint8_t abort; - uint8_t vector_sew; // current sew status - uint8_t vector_eew; // current effective sew status, should only be used after SET_ELEMENT_WIDTH - uint8_t inst_sew; // sew inside current instruction, for vsetvli elimination - uint8_t inst_vl; // vl inside current instruction, for vsetvli elimination + uint8_t vector_sew; // current sew status + uint8_t vector_eew; // current effective sew status, should only be used after SET_ELEMENT_WIDTH + uint8_t inst_sew; // sew inside current instruction, for vsetvli elimination + uint8_t inst_vl; // vl inside current instruction, for vsetvli elimination + uint8_t inst_vlmul; // vlmul inside current instruction } dynarec_rv64_t; // v0 is hardware wired to vector mask register, which should be always reserved diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index e34c6487..cffa15de 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -1224,6 +1224,26 @@ f28–31 ft8–11 FP temporaries Caller // Vector extension emitter +/* Warning: mind the differences between RVV 1.0 and XTheadVector! + * + * - Different encoding of vsetvl/th.vsetvl. + * - No vsetivli instruction. + * - Cannot configure vta and vma vsetvl instruction, the fixed value is TAMU. + * - No whole register move instructions. + * - No fractional lmul. + * - Different load/store instructions. + * - Different name of vector indexed instructions. + * - Destination vector register cannot overlap source vector register group for vmadc/vmsbc/widen arithmetic/narrow arithmetic. + * - No vlm/vsm instructions. + * - Different vnsrl/vnsra/vfncvt suffix (vv/vx/vi vs wv/wx/wi). + * - Different size of mask mode (1.0 is vl and xtheadvector is vlen). + * - No vrgatherei16.vv instruction. + * - Different encoding of vmv.s.x instruction. + * + * We ignore all the naming differences and use the RVV 1.0 naming convention. + + */ + #define VECTOR_SEW8 0b000 #define VECTOR_SEW16 0b001 #define VECTOR_SEW32 0b010 @@ -1277,15 +1297,16 @@ f28–31 ft8–11 FP temporaries Caller // Vector Indexed-Unordered Instructions (including segment part) // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#76-vector-indexed-instructions +// Note: Make sure SEW in vtype is always the same as EEW, for xtheadvector compatibility! -#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111 -#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111 -#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111 -#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111 -#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111 -#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111 -#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111 -#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111 +#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111 +#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111 +#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111 +#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111 +#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111 +#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111 +#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111 +#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111 // Vector Strided Instructions (including segment part) // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#75-vector-strided-instructions @@ -1420,31 +1441,32 @@ f28–31 ft8–11 FP temporaries Caller #define VFMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101110...........001.....1010111 #define VFNMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101111...........001.....1010111 -#define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111 -#define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111 -#define VFCVT_F_XU_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00010, 0b001, vd, 0b1010111)) // 010010......00010001.....1010111 -#define VFCVT_F_X_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00011, 0b001, vd, 0b1010111)) // 010010......00011001.....1010111 -#define VFCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00110, 0b001, vd, 0b1010111)) // 010010......00110001.....1010111 -#define VFCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00111, 0b001, vd, 0b1010111)) // 010010......00111001.....1010111 -#define VFWCVT_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01000, 0b001, vd, 0b1010111)) // 010010......01000001.....1010111 -#define VFWCVT_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01001, 0b001, vd, 0b1010111)) // 010010......01001001.....1010111 -#define VFWCVT_F_XU_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01010, 0b001, vd, 0b1010111)) // 010010......01010001.....1010111 -#define VFWCVT_F_X_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01011, 0b001, vd, 0b1010111)) // 010010......01011001.....1010111 -#define VFWCVT_F_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01100, 0b001, vd, 0b1010111)) // 010010......01100001.....1010111 -#define VFWCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01110, 0b001, vd, 0b1010111)) // 010010......01110001.....1010111 -#define VFWCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b01111, 0b001, vd, 0b1010111)) // 010010......01111001.....1010111 -#define VFNCVT_XU_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010010......10000001.....1010111 -#define VFNCVT_X_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10001, 0b001, vd, 0b1010111)) // 010010......10001001.....1010111 -#define VFNCVT_F_XU_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10010, 0b001, vd, 0b1010111)) // 010010......10010001.....1010111 -#define VFNCVT_F_X_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10011, 0b001, vd, 0b1010111)) // 010010......10011001.....1010111 -#define VFNCVT_F_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10100, 0b001, vd, 0b1010111)) // 010010......10100001.....1010111 -#define VFNCVT_ROD_F_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10101, 0b001, vd, 0b1010111)) // 010010......10101001.....1010111 -#define VFNCVT_RTZ_XU_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10110, 0b001, vd, 0b1010111)) // 010010......10110001.....1010111 -#define VFNCVT_RTZ_X_F_W(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b10111, 0b001, vd, 0b1010111)) // 010010......10111001.....1010111 -#define VFSQRT_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010011......00000001.....1010111 +#define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111 +#define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111 +#define VFCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00010, 0b001, vd, 0b1010111)) // 010010......00010001.....1010111 +#define VFCVT_F_X_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00011, 0b001, vd, 0b1010111)) // 010010......00011001.....1010111 +#define VFCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00110, 0b001, vd, 0b1010111)) // 010010......00110001.....1010111 +#define VFCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00111, 0b001, vd, 0b1010111)) // 010010......00111001.....1010111 +#define VFWCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01000, 0b001, vd, 0b1010111)) // 010010......01000001.....1010111 +#define VFWCVT_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01001, 0b001, vd, 0b1010111)) // 010010......01001001.....1010111 +#define VFWCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01010, 0b001, vd, 0b1010111)) // 010010......01010001.....1010111 +#define VFWCVT_F_X_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01011, 0b001, vd, 0b1010111)) // 010010......01011001.....1010111 +#define VFWCVT_F_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01100, 0b001, vd, 0b1010111)) // 010010......01100001.....1010111 +#define VFWCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01110, 0b001, vd, 0b1010111)) // 010010......01110001.....1010111 +#define VFWCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01111, 0b001, vd, 0b1010111)) // 010010......01111001.....1010111 +#define VFNCVT_XU_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010010......10000001.....1010111 +#define VFNCVT_X_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10001, 0b001, vd, 0b1010111)) // 010010......10001001.....1010111 +#define VFNCVT_F_XU_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10010, 0b001, vd, 0b1010111)) // 010010......10010001.....1010111 +#define VFNCVT_F_X_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10011, 0b001, vd, 0b1010111)) // 010010......10011001.....1010111 +#define VFNCVT_F_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10100, 0b001, vd, 0b1010111)) // 010010......10100001.....1010111 +#define VFNCVT_ROD_F_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10101, 0b001, vd, 0b1010111)) // 010010......10101001.....1010111 +#define VFNCVT_RTZ_XU_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10110, 0b001, vd, 0b1010111)) // 010010......10110001.....1010111 +#define VFNCVT_RTZ_X_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10111, 0b001, vd, 0b1010111)) // 010010......10111001.....1010111 +#define VFSQRT_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010011......00000001.....1010111 +#define VFCLASS_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111 + #define VFRSQRT7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00100, 0b001, vd, 0b1010111)) // 010011......00100001.....1010111 #define VFREC7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00101, 0b001, vd, 0b1010111)) // 010011......00101001.....1010111 -#define VFCLASS_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111 #define VFWADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1100000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110000...........001.....1010111 #define VFWREDUSUM_VS(vd, vs2, vs1, vm) EMIT(R_type(0b1100010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110001...........001.....1010111 @@ -1473,10 +1495,10 @@ f28–31 ft8–11 FP temporaries Caller #define VSLIDEUP_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001110...........100.....1010111 #define VSLIDEDOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001111...........100.....1010111 -#define VADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100000, vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111 +#define VADC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111 #define VMADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100010, vs2, rs1, 0b100, vd, 0b1010111)) // 0100010..........100.....1010111 #define VMADC_VX(vd, vs2, rs1) EMIT(R_type(0b0100011, vs2, rs1, 0b100, vd, 0b1010111)) // 0100011..........100.....1010111 -#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100100, vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111 +#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100100 | rv64_xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111 #define VMSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100110, vs2, rs1, 0b100, vd, 0b1010111)) // 0100110..........100.....1010111 #define VMSBC_VX(vd, vs2, rs1) EMIT(R_type(0b0100111, vs2, rs1, 0b100, vd, 0b1010111)) // 0100111..........100.....1010111 #define VMERGE_VXM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b100, vd, 0b1010111)) // 0101110..........100.....1010111 @@ -1519,10 +1541,10 @@ f28–31 ft8–11 FP temporaries Caller #define VRGATHER_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001100...........000.....1010111 #define VRGATHEREI16_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001110...........000.....1010111 -#define VADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100000, vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111 +#define VADC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111 #define VMADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100010, vs2, vs1, 0b000, vd, 0b1010111)) // 0100010..........000.....1010111 #define VMADC_VV(vd, vs2, vs1) EMIT(R_type(0b0100011, vs2, vs1, 0b000, vd, 0b1010111)) // 0100011..........000.....1010111 -#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100100, vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111 +#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100100 | rv64_xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111 #define VMSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100110, vs2, vs1, 0b000, vd, 0b1010111)) // 0100110..........000.....1010111 #define VMSBC_VV(vd, vs2, vs1) EMIT(R_type(0b0100111, vs2, vs1, 0b000, vd, 0b1010111)) // 0100111..........000.....1010111 #define VMERGE_VVM(vd, vs2, vs1) EMIT(R_type(0b0101110, vs2, vs1, 0b000, vd, 0b1010111)) // 0101110..........000.....1010111 @@ -1562,7 +1584,7 @@ f28–31 ft8–11 FP temporaries Caller #define VSLIDEUP_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001110...........011.....1010111 #define VSLIDEDOWN_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001111...........011.....1010111 -#define VADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100000, vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111 +#define VADC_VIM(vd, vs2, simm5) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111 #define VMADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100010, vs2, simm5, 0b011, vd, 0b1010111)) // 0100010..........011.....1010111 #define VMADC_VI(vd, vs2, simm5) EMIT(R_type(0b0100011, vs2, simm5, 0b011, vd, 0b1010111)) // 0100011..........011.....1010111 #define VMERGE_VIM(vd, vs2, simm5) EMIT(R_type(0b0101110, vs2, simm5, 0b011, vd, 0b1010111)) // 0101110..........011.....1010111 @@ -1602,12 +1624,13 @@ f28–31 ft8–11 FP temporaries Caller #define VREDMIN_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000101...........010.....1010111 #define VREDMAXU_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000110...........010.....1010111 #define VREDMAX_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000111...........010.....1010111 -#define VAADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001000...........010.....1010111 #define VAADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001001...........010.....1010111 -#define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111 #define VASUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001011...........010.....1010111 +// Warning, no unsigned edition in Xtheadvector +#define VAADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001000...........010.....1010111 +#define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111 -#define VMV_X_S(rd, vs2) EMIT(R_type(0b0100001, vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111 +#define VMV_X_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111 // Vector Integer Extension Instructions // https://github.com/riscv/riscv-v-spec/blob/e49574c92b072fd4d71e6cb20f7e8154de5b83fe/v-spec.adoc#123-vector-integer-extension @@ -1629,12 +1652,12 @@ f28–31 ft8–11 FP temporaries Caller #define VMNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111101, vs2, vs1, 0b010, vd, 0b1010111)) // 0111101..........010.....1010111 #define VMXNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111111, vs2, vs1, 0b010, vd, 0b1010111)) // 0111111..........010.....1010111 -#define VMSBF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111 -#define VMSOF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111 -#define VMSIF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00011, 0b010, vd, 0b1010111)) // 010100......00011010.....1010111 -#define VIOTA_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b10000, 0b010, vd, 0b1010111)) // 010100......10000010.....1010111 -#define VCPOP_M(rd, vs2, vm) EMIT(R_type(0b0100000 | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111 -#define VFIRST_M(rd, vs2, vm) EMIT(R_type(0b0100000 | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111 +#define VMSBF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111 +#define VMSOF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111 +#define VMSIF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00011, 0b010, vd, 0b1010111)) // 010100......00011010.....1010111 +#define VIOTA_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b10000, 0b010, vd, 0b1010111)) // 010100......10000010.....1010111 +#define VCPOP_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111 +#define VFIRST_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111 #define VID_V(vd, vm) EMIT(R_type(0b0101000 | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111 @@ -1673,7 +1696,8 @@ f28–31 ft8–11 FP temporaries Caller #define VSLIDE1UP_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001110...........110.....1010111 #define VSLIDE1DOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001111...........110.....1010111 -#define VMV_S_X(vd, rs1) EMIT(I_type(0b010000100000, rs1, 0b110, vd, 0b1010111)) // 010000100000.....110.....1010111 +// Warning, upper elements will be cleared in xtheadvector! +#define VMV_S_X(vd, rs1) EMIT(I_type((rv64_xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b110, vd, 0b1010111)) #define VDIVU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100000...........110.....1010111 #define VDIV_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100001...........110.....1010111 diff --git a/src/dynarec/rv64/rv64_printer.c b/src/dynarec/rv64/rv64_printer.c index a5e6fc07..3eaf92f2 100644 --- a/src/dynarec/rv64/rv64_printer.c +++ b/src/dynarec/rv64/rv64_printer.c @@ -362,6 +362,37 @@ const char* rv64_print(uint32_t opcode, uintptr_t addr) } } + if (rv64_xtheadvector) { + /* These are written by hand.... */ + + // rv_v, VSETVLI + if ((opcode & 0x8000707f) == 0x7057) { + a.imm = FX(opcode, 30, 20); + a.rs1 = FX(opcode, 19, 15); + a.rd = FX(opcode, 11, 7); + const char *lmul_str, *sew_str; + switch (a.imm & 0b11) { + case 0b00: lmul_str = "m1"; break; + case 0b01: lmul_str = "m2"; break; + case 0b10: lmul_str = "m4"; break; + case 0b11: lmul_str = "m8"; break; + default: lmul_str = "reserved"; break; + } + switch ((a.imm & 0b0011100) >> 2) { + case 0b000: sew_str = "e8"; break; + case 0b001: sew_str = "e16"; break; + case 0b010: sew_str = "e32"; break; + case 0b011: sew_str = "e64"; break; + default: sew_str = "reserved"; break; + } + + snprintf(buff, sizeof(buff), "%-15s %s, %s, %s, %s", "VSETVLI", gpr[a.rd], gpr[a.rs1], sew_str, lmul_str); + return buff; + } + + // TODO: add more... + } + /**************** * Generated by https://github.com/ksco/riscv-opcodes/tree/box64_printer * Command: python parse.py -box64 rv_a rv_d rv_f rv_i rv_m rv_v rv_zba rv_zbb rv_zbc rv_zicsr rv_zbs rv64_a rv64_d rv64_f rv64_i rv64_m rv64_zba rv64_zbb rv64_zbs > code.c diff --git a/src/include/debug.h b/src/include/debug.h index fd3f23c0..7264261d 100644 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -56,6 +56,7 @@ extern int rv64_zbb; extern int rv64_zbc; extern int rv64_zbs; extern int rv64_vector; +extern int rv64_xtheadvector; // rvv 1.0 or xtheadvector extern int rv64_vlen; extern int rv64_xtheadba; extern int rv64_xtheadbb; diff --git a/src/rv64detect.c b/src/rv64detect.c index 01f36fcc..b0664b05 100644 --- a/src/rv64detect.c +++ b/src/rv64detect.c @@ -68,12 +68,18 @@ void RV64_Detect_Function() BR(xRA); rv64_zbs = Check(my_block); - // Test Vector v1.0 with CSRR zero, vcsr block = (uint32_t*)my_block; - CSRRS(xZR, xZR, 0x00f); + CSRRS(xZR, xZR, 0xc22 /* vlenb */); BR(xRA); rv64_vector = Check(my_block); + if (rv64_vector) { + block = (uint32_t*)my_block; + CSRRS(xZR, xZR, 0x00f /* vcsr */); // vcsr does not exists in xtheadvector + BR(xRA); + rv64_xtheadvector = !Check(my_block); + } + if (rv64_vector) { int vlenb = 0; asm volatile("csrr %0, 0xc22" : "=r"(vlenb));