From 979a3232f9d23c34b6c8c86fb3913fb34b3da333 Mon Sep 17 00:00:00 2001 From: xctan Date: Fri, 15 Nov 2024 00:41:28 +0800 Subject: [PATCH] [RV64_DYNAREC] Added more MMX opcodes for vector (#2035) * [RV64_DYNAREC] Added 0F 68 PUNPCKHBW opcode * [RV64_DYNAREC] Added 0F 69 PUNPCKHWD opcode * [RV64_DYNAREC] Added 0F 6A PUNPCKHDQ opcode * [RV64_DYNAREC] Updated 0F 68-69 PUNPCKHBW/WD opcodes * [RV64_DYNAREC] Added 0F 60 PUNPCKLBW opcode * [RV64_DYNAREC] Added 0F 61 PUNPCKLWD opcode * [RV64_DYNAREC] Added 0F 62 PUNPCKLDQ opcode * [RV64_DYNAREC] Simplified MMX PUNPCK{L,H}{BW,WD,DQ} --- src/dynarec/rv64/dynarec_rv64_0f_vector.c | 101 ++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index f5c3a4d8..195d58f7 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -483,6 +483,52 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VADD_VX(q0, q1, xZR, VECTOR_MASKED); } break; + case 0x60: + INST_NAME("PUNPCKLBW Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VWADDU_VX(d0, q0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VSLL_VI(v0, v0, 8, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; + case 0x61: + INST_NAME("PUNPCKLWD Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VWADDU_VX(d0, q0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VSLL_VI(v0, v0, 16, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; + case 0x62: + INST_NAME("PUNPCKLDQ Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + MOV32w(x2, 32); + VWADDU_VX(d0, q0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VSLL_VX(v0, v0, x2, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; case 0x63: INST_NAME("PACKSSWB Gm, Em"); nextop = F8; @@ -515,6 +561,61 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); VNCLIPU_WI(q0, d0, 0, VECTOR_UNMASKED); break; + case 0x68: + INST_NAME("PUNPCKHBW Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VSLIDEDOWN_VI(v0, q0, 4, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v1, q1, 4, VECTOR_UNMASKED); + VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VSLL_VI(v0, v0, 8, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; + case 0x69: + INST_NAME("PUNPCKHWD Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VSLIDEDOWN_VI(v0, q0, 2, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v1, q1, 2, VECTOR_UNMASKED); + VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VSLL_VI(v0, v0, 16, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; + case 0x6A: + INST_NAME("PUNPCKHDQ Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + MOV32w(x2, 32); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VSLIDEDOWN_VI(v0, q0, 1, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v1, q1, 1, VECTOR_UNMASKED); + VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VSLL_VX(v0, v0, x2, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; case 0x6B: INST_NAME("PACKSSDW Gm, Em"); nextop = F8;