mirror of
https://github.com/ptitSeb/box64.git
synced 2024-10-06 19:33:37 +00:00
[RV64_DYNAREC] Added more opcodes for xtheadvector and fixed more issues (#1897)
* [RV64_DYNAREC] Added 1 more opcode for xtheadvector * Disabled more opcodes does not apply to xtheadvector * Added 1 more opcode * Added 1 more opcode * fix
This commit is contained in:
parent
e2492a1426
commit
1c7ac8c761
@ -359,6 +359,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VSUB_VV(q0, q0, v0, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0x20:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVSXBW Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -371,6 +373,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v0);
|
||||
break;
|
||||
case 0x21:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVSXBD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -387,6 +391,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v1);
|
||||
break;
|
||||
case 0x22:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVSXBQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -405,6 +411,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v0);
|
||||
break;
|
||||
case 0x23:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVSXWD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -417,6 +425,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v0);
|
||||
break;
|
||||
case 0x24:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVSXWQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -433,6 +443,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v1);
|
||||
break;
|
||||
case 0x25:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVSXDQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -445,6 +457,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v0);
|
||||
break;
|
||||
case 0x28:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMULDQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
@ -452,7 +466,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
GETEX_vector(q1, 0, 0, VECTOR_SEW64);
|
||||
d0 = fpu_get_scratch(dyn);
|
||||
d1 = fpu_get_scratch(dyn);
|
||||
// make sure the alignments before vnclip...
|
||||
// make sure the alignments before vnsrl...
|
||||
v0 = (q0 & 1) ? fpu_get_scratch_lmul(dyn, VECTOR_LMUL2) : q0;
|
||||
v1 = (q1 & 1) ? fpu_get_scratch_lmul(dyn, VECTOR_LMUL2) : q1;
|
||||
if (v0 != q0) VMV_V_V(v0, q0);
|
||||
@ -465,6 +479,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
if (v0 != q0) VMV_V_V(q0, v0);
|
||||
break;
|
||||
case 0x2B:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PACKUSDW Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
@ -484,6 +500,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VSLIDEUP_VI(q0, v0, 4, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0x30:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVZXBW Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -496,6 +514,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v0);
|
||||
break;
|
||||
case 0x31:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVZXBD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -512,6 +532,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v1);
|
||||
break;
|
||||
case 0x32:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVZXBQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -530,6 +552,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v0);
|
||||
break;
|
||||
case 0x33:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVZXWD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -542,6 +566,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v0);
|
||||
break;
|
||||
case 0x34:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVZXWQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -558,6 +584,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VMV_V_V(q0, v1);
|
||||
break;
|
||||
case 0x35:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: lack of tail undisturbed
|
||||
|
||||
INST_NAME("PMOVZXDQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
@ -1096,24 +1124,53 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
}
|
||||
break;
|
||||
case 0x70:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // lack of vrgatherei16.vv
|
||||
|
||||
INST_NAME("PSHUFD Gx, Ex, Ib");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
GETGX_vector(q0, 1, VECTOR_SEW64);
|
||||
GETEX_vector(q1, 0, 0, VECTOR_SEW64);
|
||||
GETGX_empty_vector(q0);
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
tmp64u0 = F8;
|
||||
tmp64u0 = ((tmp64u0 >> 6) << 48) | (((tmp64u0 >> 4) & 3) << 32) | (((tmp64u0 >> 2) & 3) << 16) | (tmp64u0 & 3);
|
||||
VECTOR_SPLAT_IMM(v0, tmp64u0, x4);
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
if (q0 == q1) {
|
||||
if (rv64_xtheadvector) { // lack of vrgatherei16.vv
|
||||
v1 = fpu_get_scratch(dyn);
|
||||
VRGATHEREI16_VV(v1, q1, v0, VECTOR_UNMASKED);
|
||||
VMV_V_V(q0, v1);
|
||||
if (tmp64u0 == 0) {
|
||||
VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
|
||||
} else {
|
||||
tmp64u1 = (((tmp64u0 >> 6) & 3) << 32) | ((tmp64u0 >> 4) & 3);
|
||||
tmp64u0 = (((tmp64u0 >> 2) & 3) << 32) | (tmp64u0 & 3);
|
||||
if (tmp64u1 == 0) {
|
||||
VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
|
||||
} else {
|
||||
MOV64x(x4, tmp64u1);
|
||||
VMV_S_X(v1, x4); // clears upper bits!
|
||||
}
|
||||
if (tmp64u0 == tmp64u1) {
|
||||
VSLIDE1UP_VX(v0, v1, x4, VECTOR_UNMASKED);
|
||||
} else if (tmp64u0 == 0) {
|
||||
VSLIDE1UP_VX(v0, v1, xZR, VECTOR_UNMASKED);
|
||||
} else {
|
||||
MOV64x(x4, tmp64u0);
|
||||
VSLIDE1UP_VX(v0, v1, x4, VECTOR_UNMASKED);
|
||||
}
|
||||
}
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
if (q0 == q1) {
|
||||
VRGATHER_VV(v1, q1, v0, VECTOR_UNMASKED);
|
||||
VMV_V_V(q0, v1);
|
||||
} else {
|
||||
VRGATHER_VV(q0, q1, v0, VECTOR_UNMASKED);
|
||||
}
|
||||
} else {
|
||||
VRGATHEREI16_VV(q0, q1, v0, VECTOR_UNMASKED);
|
||||
tmp64u0 = ((tmp64u0 >> 6) << 48) | (((tmp64u0 >> 4) & 3) << 32) | (((tmp64u0 >> 2) & 3) << 16) | (tmp64u0 & 3);
|
||||
VECTOR_SPLAT_IMM(v0, tmp64u0, x4);
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
if (q0 == q1) {
|
||||
v1 = fpu_get_scratch(dyn);
|
||||
VRGATHEREI16_VV(v1, q1, v0, VECTOR_UNMASKED);
|
||||
VMV_V_V(q0, v1);
|
||||
} else {
|
||||
VRGATHEREI16_VV(q0, q1, v0, VECTOR_UNMASKED);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 0x71:
|
||||
@ -1441,19 +1498,21 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
}
|
||||
break;
|
||||
case 0xD7:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // TODO: VMASK convertion
|
||||
|
||||
INST_NAME("PMOVMSKB Gd, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
GETGD;
|
||||
GETEX_vector(q0, 0, 0, VECTOR_SEW8);
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8);
|
||||
VSRL_VI(v0, q0, 7, VECTOR_UNMASKED);
|
||||
if (rv64_xtheadvector) {
|
||||
// Force VMASK element size to 1bit
|
||||
vector_vsetvli(dyn, ninst, x4, VECTOR_SEW8, VECTOR_LMUL8, 1);
|
||||
}
|
||||
VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED);
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
VMV_X_S(gd, VMASK);
|
||||
ZEXTH(gd, gd);
|
||||
if (!rv64_xtheadvector) { ZEXTH(gd, gd); }
|
||||
break;
|
||||
case 0xD8:
|
||||
case 0xD9:
|
||||
@ -1708,8 +1767,6 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
VSLL_VX(q0, q0, x4, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0xF5:
|
||||
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // lack of vrgatherei16.vv
|
||||
|
||||
INST_NAME("PMADDWD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
@ -1717,18 +1774,36 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
GETEX_vector(q1, 0, 0, VECTOR_SEW16);
|
||||
v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
|
||||
v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
|
||||
VWMUL_VV(v0, q1, q0, VECTOR_UNMASKED);
|
||||
d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // warning, no more scratches!
|
||||
ADDI(x4, xZR, 6);
|
||||
VID_V(d0, VECTOR_UNMASKED);
|
||||
VSLL_VI(d0, d0, 1, VECTOR_UNMASKED); // times 2
|
||||
VMIN_VX(d0, d0, x4, VECTOR_UNMASKED);
|
||||
VADD_VI(q0, d0, 1, VECTOR_UNMASKED);
|
||||
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
|
||||
VRGATHEREI16_VV(v1, v0, d0, VECTOR_UNMASKED); // 6 4 2 0
|
||||
VRGATHEREI16_VV(d0, v0, q0, VECTOR_UNMASKED); // 7 5 3 1
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
VADD_VV(q0, v1, d0, VECTOR_UNMASKED);
|
||||
d1 = fpu_get_scratch(dyn); // use this at caution!
|
||||
VWMUL_VV(v0, q1, q0, VECTOR_UNMASKED);
|
||||
if (rv64_xtheadvector) { // lack of vrgatherei16.vv
|
||||
ADDI(x4, xZR, 6);
|
||||
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
|
||||
VID_V(d0, VECTOR_UNMASKED);
|
||||
VSLL_VI(d0, d0, 1, VECTOR_UNMASKED); // times 2
|
||||
VMIN_VX(d0, d0, x4, VECTOR_UNMASKED);
|
||||
VRGATHER_VV(v1, v0, d0, VECTOR_UNMASKED); // 6 4 2 0
|
||||
// out of scratches, go back to lmul1 and vmv....
|
||||
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 1);
|
||||
VMV_V_V(d1, v1);
|
||||
VADD_VI(d0, d0, 1, VECTOR_UNMASKED);
|
||||
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
|
||||
VRGATHER_VV(v1, v0, d0, VECTOR_UNMASKED); // 7 5 3 1
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
VADD_VV(q0, v1, d1, VECTOR_UNMASKED);
|
||||
} else {
|
||||
ADDI(x4, xZR, 6);
|
||||
VID_V(d0, VECTOR_UNMASKED);
|
||||
VSLL_VI(d0, d0, 1, VECTOR_UNMASKED); // times 2
|
||||
VMIN_VX(d0, d0, x4, VECTOR_UNMASKED);
|
||||
VADD_VI(q0, d0, 1, VECTOR_UNMASKED);
|
||||
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
|
||||
VRGATHEREI16_VV(v1, v0, d0, VECTOR_UNMASKED); // 6 4 2 0
|
||||
VRGATHEREI16_VV(d0, v0, q0, VECTOR_UNMASKED); // 7 5 3 1
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
|
||||
VADD_VV(q0, v1, d0, VECTOR_UNMASKED);
|
||||
}
|
||||
break;
|
||||
case 0xF6:
|
||||
INST_NAME("PSADBW Gx, Ex");
|
||||
|
@ -2609,7 +2609,7 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f
|
||||
|
||||
uint32_t vl = (int)((float)(16 >> sew) * multiple);
|
||||
uint32_t vtypei = (sew << (3 - !!rv64_xtheadvector)) | vlmul;
|
||||
if (dyn->inst_sew == VECTOR_SEWNA || dyn->inst_vl == 0 || dyn->inst_sew != sew || dyn->inst_vl != vl) {
|
||||
if (dyn->inst_sew == VECTOR_SEWNA || dyn->inst_vl == 0 || dyn->inst_sew != sew || dyn->inst_vl != vl || dyn->inst_vlmul != vlmul) {
|
||||
if (vl <= 31 && !rv64_xtheadvector) {
|
||||
VSETIVLI(xZR, vl, vtypei);
|
||||
} else {
|
||||
|
@ -1630,6 +1630,7 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
#define VAADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001000...........010.....1010111
|
||||
#define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111
|
||||
|
||||
// Warning: zero-extended on xtheadvector!
|
||||
#define VMV_X_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111
|
||||
|
||||
// Vector Integer Extension Instructions
|
||||
@ -1659,7 +1660,7 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
#define VCPOP_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111
|
||||
#define VFIRST_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111
|
||||
|
||||
#define VID_V(vd, vm) EMIT(R_type(0b0101000 | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111
|
||||
#define VID_V(vd, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111
|
||||
|
||||
#define VDIVU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100000...........010.....1010111
|
||||
#define VDIV_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100001...........010.....1010111
|
||||
|
Loading…
Reference in New Issue
Block a user