mirror of
https://github.com/ptitSeb/box64.git
synced 2024-10-07 11:53:58 +00:00
[RV64_DYNAREC] Added more 66 0F opcodes for vector (#1815)
* [RV64_DYNAREC] Added more 66 0F opcodes for vector * [RV64_DYNAREC] Fixed PACKUSWB for vlen >= 256
This commit is contained in:
parent
629346b6a7
commit
fc9900c8f6
@ -36,6 +36,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
int q0, q1;
|
||||
int d0, d1, d2;
|
||||
int64_t fixedaddress, gdoffset;
|
||||
uint32_t vtypei;
|
||||
int unscaled;
|
||||
MAYUSE(d0);
|
||||
MAYUSE(d1);
|
||||
@ -123,6 +124,60 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
DEFAULT_VECTOR;
|
||||
}
|
||||
break;
|
||||
case 0x61:
|
||||
INST_NAME("PUNPCKLWD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
ADDI(x1, xZR, 0b10101010);
|
||||
VMV_V_X(VMASK, x1); // VMASK = 0b10101010
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 3 3 2 2 1 1 0 0
|
||||
GETGX_vector(q0, 1, VECTOR_SEW16);
|
||||
GETEX_vector(q1, 0, 0, VECTOR_SEW16);
|
||||
d0 = fpu_get_scratch(dyn);
|
||||
d1 = fpu_get_scratch(dyn);
|
||||
VRGATHER_VV(d0, v0, q0, VECTOR_UNMASKED);
|
||||
VRGATHER_VV(d1, v0, q1, VECTOR_UNMASKED);
|
||||
VMERGE_VVM(q0, d1, d0);
|
||||
break;
|
||||
case 0x67:
|
||||
INST_NAME("PACKUSWB Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
GETGX_vector(q0, 1, VECTOR_SEW16);
|
||||
GETEX_vector(q1, 0, 0, VECTOR_SEW16);
|
||||
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
|
||||
d0 = fpu_get_scratch(dyn);
|
||||
d1 = fpu_get_scratch(dyn);
|
||||
VMAX_VX(d0, xZR, q0, VECTOR_UNMASKED);
|
||||
VMAX_VX(d1, xZR, q1, VECTOR_UNMASKED);
|
||||
if (rv64_vlen >= 256) {
|
||||
/* mu tu sew lmul=1 */
|
||||
vtypei = (0b0 << 7) | (0b0 << 6) | (VECTOR_SEW16 << 3) | 0b000;
|
||||
ADDI(x1, xZR, 16); // double the vl for slideup.
|
||||
VSETVLI(xZR, x1, vtypei);
|
||||
VSLIDEUP_VI(d0, 8, d1, VECTOR_UNMASKED); // splice d0 and d1 here!
|
||||
}
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
|
||||
VNCLIPU_WI(q0, 0, d0, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0x69:
|
||||
INST_NAME("PUNPCKHWD Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
|
||||
ADDI(x1, xZR, 0b10101010);
|
||||
VMV_V_X(VMASK, x1); // VMASK = 0b10101010
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
VIOTA_M(v0, VMASK, VECTOR_UNMASKED);
|
||||
VADD_VI(v0, 4, v0, VECTOR_UNMASKED); // v0 = 7 7 6 6 5 5 4 4
|
||||
GETGX_vector(q0, 1, VECTOR_SEW16);
|
||||
GETEX_vector(q1, 0, 0, VECTOR_SEW16);
|
||||
d0 = fpu_get_scratch(dyn);
|
||||
d1 = fpu_get_scratch(dyn);
|
||||
VRGATHER_VV(d0, v0, q0, VECTOR_UNMASKED);
|
||||
VRGATHER_VV(d1, v0, q1, VECTOR_UNMASKED);
|
||||
VMERGE_VVM(q0, d1, d0);
|
||||
break;
|
||||
case 0x6C:
|
||||
INST_NAME("PUNPCKLQDQ Gx, Ex");
|
||||
nextop = F8;
|
||||
@ -190,18 +245,26 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
|
||||
} else {
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1);
|
||||
q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, dyn->vector_eew);
|
||||
GETEX_vector(q1, 0, 0, VECTOR_SEW8);
|
||||
GETEX_vector(q1, 0, 0, dyn->vector_eew);
|
||||
VXOR_VV(q0, q0, q1, VECTOR_UNMASKED);
|
||||
}
|
||||
break;
|
||||
case 0xD4:
|
||||
INST_NAME("PADDQ Gx,Ex");
|
||||
INST_NAME("PADDQ Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
|
||||
GETGX_vector(q0, 1, VECTOR_SEW64);
|
||||
GETEX_vector(q1, 0, 0, VECTOR_SEW64);
|
||||
VADD_VV(q0, q0, q1, VECTOR_UNMASKED);
|
||||
break;
|
||||
case 0xDB:
|
||||
INST_NAME("PAND Gx, Ex");
|
||||
nextop = F8;
|
||||
SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1);
|
||||
GETGX_vector(q0, 1, dyn->vector_eew);
|
||||
GETEX_vector(q1, 0, 0, dyn->vector_eew);
|
||||
VAND_VV(q0, q0, q1, VECTOR_UNMASKED);
|
||||
break;
|
||||
default:
|
||||
DEFAULT_VECTOR;
|
||||
}
|
||||
|
@ -2601,7 +2601,7 @@ int vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew)
|
||||
* sew: selected element width
|
||||
* lmul: vector register group multiplier
|
||||
*
|
||||
* mu tu sew lmul=1 */
|
||||
* mu tu sew lmul=1 */
|
||||
uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | 0b000;
|
||||
ADDI(s1, xZR, 16 >> sew);
|
||||
VSETVLI(xZR, s1, vtypei);
|
||||
|
Loading…
Reference in New Issue
Block a user