[RV64_DYNAREC] Added more opcodes for xtheadvector and fixed more issues (#1897)

* [RV64_DYNAREC] Added 1 more opcode for xtheadvector

* Disabled more opcodes does not apply to xtheadvector

* Added 1 more opcode

* Added 1 more opcode

* fix
This commit is contained in:
Yang Liu 2024-10-03 18:36:04 +08:00 committed by GitHub
parent e2492a1426
commit 1c7ac8c761
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 106 additions and 30 deletions

View File

@ -359,6 +359,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VSUB_VV(q0, q0, v0, VECTOR_UNMASKED);
break;
case 0x20:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVSXBW Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -371,6 +373,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v0);
break;
case 0x21:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVSXBD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -387,6 +391,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v1);
break;
case 0x22:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVSXBQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -405,6 +411,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v0);
break;
case 0x23:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVSXWD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -417,6 +425,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v0);
break;
case 0x24:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVSXWQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -433,6 +443,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v1);
break;
case 0x25:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVSXDQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -445,6 +457,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v0);
break;
case 0x28:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMULDQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
@ -452,7 +466,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
GETEX_vector(q1, 0, 0, VECTOR_SEW64);
d0 = fpu_get_scratch(dyn);
d1 = fpu_get_scratch(dyn);
// make sure the alignments before vnclip...
// make sure the alignments before vnsrl...
v0 = (q0 & 1) ? fpu_get_scratch_lmul(dyn, VECTOR_LMUL2) : q0;
v1 = (q1 & 1) ? fpu_get_scratch_lmul(dyn, VECTOR_LMUL2) : q1;
if (v0 != q0) VMV_V_V(v0, q0);
@ -465,6 +479,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
if (v0 != q0) VMV_V_V(q0, v0);
break;
case 0x2B:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PACKUSDW Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
@ -484,6 +500,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VSLIDEUP_VI(q0, v0, 4, VECTOR_UNMASKED);
break;
case 0x30:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVZXBW Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -496,6 +514,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v0);
break;
case 0x31:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVZXBD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -512,6 +532,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v1);
break;
case 0x32:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVZXBQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -530,6 +552,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v0);
break;
case 0x33:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVZXWD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -542,6 +566,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v0);
break;
case 0x34:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVZXWQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -558,6 +584,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VMV_V_V(q0, v1);
break;
case 0x35:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
INST_NAME("PMOVZXDQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@ -1096,24 +1124,53 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
break;
case 0x70:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // lack of vrgatherei16.vv
INST_NAME("PSHUFD Gx, Ex, Ib");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(q0, 1, VECTOR_SEW64);
GETEX_vector(q1, 0, 0, VECTOR_SEW64);
GETGX_empty_vector(q0);
v0 = fpu_get_scratch(dyn);
tmp64u0 = F8;
tmp64u0 = ((tmp64u0 >> 6) << 48) | (((tmp64u0 >> 4) & 3) << 32) | (((tmp64u0 >> 2) & 3) << 16) | (tmp64u0 & 3);
VECTOR_SPLAT_IMM(v0, tmp64u0, x4);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
if (q0 == q1) {
if (rv64_xtheadvector) { // lack of vrgatherei16.vv
v1 = fpu_get_scratch(dyn);
VRGATHEREI16_VV(v1, q1, v0, VECTOR_UNMASKED);
VMV_V_V(q0, v1);
if (tmp64u0 == 0) {
VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
} else {
tmp64u1 = (((tmp64u0 >> 6) & 3) << 32) | ((tmp64u0 >> 4) & 3);
tmp64u0 = (((tmp64u0 >> 2) & 3) << 32) | (tmp64u0 & 3);
if (tmp64u1 == 0) {
VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
} else {
MOV64x(x4, tmp64u1);
VMV_S_X(v1, x4); // clears upper bits!
}
if (tmp64u0 == tmp64u1) {
VSLIDE1UP_VX(v0, v1, x4, VECTOR_UNMASKED);
} else if (tmp64u0 == 0) {
VSLIDE1UP_VX(v0, v1, xZR, VECTOR_UNMASKED);
} else {
MOV64x(x4, tmp64u0);
VSLIDE1UP_VX(v0, v1, x4, VECTOR_UNMASKED);
}
}
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
if (q0 == q1) {
VRGATHER_VV(v1, q1, v0, VECTOR_UNMASKED);
VMV_V_V(q0, v1);
} else {
VRGATHER_VV(q0, q1, v0, VECTOR_UNMASKED);
}
} else {
VRGATHEREI16_VV(q0, q1, v0, VECTOR_UNMASKED);
tmp64u0 = ((tmp64u0 >> 6) << 48) | (((tmp64u0 >> 4) & 3) << 32) | (((tmp64u0 >> 2) & 3) << 16) | (tmp64u0 & 3);
VECTOR_SPLAT_IMM(v0, tmp64u0, x4);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
if (q0 == q1) {
v1 = fpu_get_scratch(dyn);
VRGATHEREI16_VV(v1, q1, v0, VECTOR_UNMASKED);
VMV_V_V(q0, v1);
} else {
VRGATHEREI16_VV(q0, q1, v0, VECTOR_UNMASKED);
}
}
break;
case 0x71:
@ -1441,19 +1498,21 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
break;
case 0xD7:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: VMASK convertion
INST_NAME("PMOVMSKB Gd, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
GETGD;
GETEX_vector(q0, 0, 0, VECTOR_SEW8);
v0 = fpu_get_scratch(dyn);
v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8);
VSRL_VI(v0, q0, 7, VECTOR_UNMASKED);
if (rv64_xtheadvector) {
// Force VMASK element size to 1bit
vector_vsetvli(dyn, ninst, x4, VECTOR_SEW8, VECTOR_LMUL8, 1);
}
VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
VMV_X_S(gd, VMASK);
ZEXTH(gd, gd);
if (!rv64_xtheadvector) { ZEXTH(gd, gd); }
break;
case 0xD8:
case 0xD9:
@ -1708,8 +1767,6 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VSLL_VX(q0, q0, x4, VECTOR_UNMASKED);
break;
case 0xF5:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // lack of vrgatherei16.vv
INST_NAME("PMADDWD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
@ -1717,18 +1774,36 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
GETEX_vector(q1, 0, 0, VECTOR_SEW16);
v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
VWMUL_VV(v0, q1, q0, VECTOR_UNMASKED);
d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // warning, no more scratches!
ADDI(x4, xZR, 6);
VID_V(d0, VECTOR_UNMASKED);
VSLL_VI(d0, d0, 1, VECTOR_UNMASKED); // times 2
VMIN_VX(d0, d0, x4, VECTOR_UNMASKED);
VADD_VI(q0, d0, 1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
VRGATHEREI16_VV(v1, v0, d0, VECTOR_UNMASKED); // 6 4 2 0
VRGATHEREI16_VV(d0, v0, q0, VECTOR_UNMASKED); // 7 5 3 1
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VADD_VV(q0, v1, d0, VECTOR_UNMASKED);
d1 = fpu_get_scratch(dyn); // use this at caution!
VWMUL_VV(v0, q1, q0, VECTOR_UNMASKED);
if (rv64_xtheadvector) { // lack of vrgatherei16.vv
ADDI(x4, xZR, 6);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
VID_V(d0, VECTOR_UNMASKED);
VSLL_VI(d0, d0, 1, VECTOR_UNMASKED); // times 2
VMIN_VX(d0, d0, x4, VECTOR_UNMASKED);
VRGATHER_VV(v1, v0, d0, VECTOR_UNMASKED); // 6 4 2 0
// out of scratches, go back to lmul1 and vmv....
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 1);
VMV_V_V(d1, v1);
VADD_VI(d0, d0, 1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
VRGATHER_VV(v1, v0, d0, VECTOR_UNMASKED); // 7 5 3 1
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VADD_VV(q0, v1, d1, VECTOR_UNMASKED);
} else {
ADDI(x4, xZR, 6);
VID_V(d0, VECTOR_UNMASKED);
VSLL_VI(d0, d0, 1, VECTOR_UNMASKED); // times 2
VMIN_VX(d0, d0, x4, VECTOR_UNMASKED);
VADD_VI(q0, d0, 1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
VRGATHEREI16_VV(v1, v0, d0, VECTOR_UNMASKED); // 6 4 2 0
VRGATHEREI16_VV(d0, v0, q0, VECTOR_UNMASKED); // 7 5 3 1
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VADD_VV(q0, v1, d0, VECTOR_UNMASKED);
}
break;
case 0xF6:
INST_NAME("PSADBW Gx, Ex");

View File

@ -2609,7 +2609,7 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f
uint32_t vl = (int)((float)(16 >> sew) * multiple);
uint32_t vtypei = (sew << (3 - !!rv64_xtheadvector)) | vlmul;
if (dyn->inst_sew == VECTOR_SEWNA || dyn->inst_vl == 0 || dyn->inst_sew != sew || dyn->inst_vl != vl) {
if (dyn->inst_sew == VECTOR_SEWNA || dyn->inst_vl == 0 || dyn->inst_sew != sew || dyn->inst_vl != vl || dyn->inst_vlmul != vlmul) {
if (vl <= 31 && !rv64_xtheadvector) {
VSETIVLI(xZR, vl, vtypei);
} else {

View File

@ -1630,6 +1630,7 @@ f2831 ft811 FP temporaries Caller
#define VAADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001000...........010.....1010111
#define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111
// Warning: zero-extended on xtheadvector!
#define VMV_X_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111
// Vector Integer Extension Instructions
@ -1659,7 +1660,7 @@ f2831 ft811 FP temporaries Caller
#define VCPOP_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111
#define VFIRST_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111
#define VID_V(vd, vm) EMIT(R_type(0b0101000 | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111
#define VID_V(vd, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111
#define VDIVU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100000...........010.....1010111
#define VDIV_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100001...........010.....1010111