From 4d260217054f16e4efc31c9bef7974898f43252e Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Fri, 5 Apr 2024 22:34:59 +0800 Subject: [PATCH] [LA64_DYNAREC] Added more opcodes (#1416) * [LA64_DYNAREC] Added 0F AF IMUL opcode * Update clang-format rules * Added 08 OR opcode * Added F7 /3 NEG opcode and fixed some potential bugs --- .clang-format | 2 +- src/dynarec/la64/dynarec_la64_00.c | 16 ++++ src/dynarec/la64/dynarec_la64_0f.c | 67 +++++++++++++++++ src/dynarec/la64/dynarec_la64_emit_logic.c | 39 +++++++++- src/dynarec/la64/dynarec_la64_emit_math.c | 86 +++++++++++++++++++++- src/dynarec/la64/dynarec_la64_helper.h | 17 +++++ src/dynarec/la64/la64_emitter.h | 72 ++++++++++++++++++ src/dynarec/rv64/dynarec_rv64_0f.c | 1 + 8 files changed, 296 insertions(+), 4 deletions(-) diff --git a/.clang-format b/.clang-format index 95411de6..826406e5 100644 --- a/.clang-format +++ b/.clang-format @@ -12,6 +12,6 @@ MaxEmptyLinesToKeep: 2 IndentCaseLabels: true AlignConsecutiveMacros: true WhitespaceSensitiveMacros: ['QUOTE'] -IfMacros: ['IFX', 'IFX2', 'IF_PEND0R0', 'IFXX', 'IFX2X', 'IFXN', 'UFLAG_IF', 'PASS2IF'] +IfMacros: ['IFX', 'IFX2', 'IFXA', 'IF_PEND0R0', 'IFXX', 'IFX2X', 'IFXN', 'UFLAG_IF', 'PASS2IF'] UseTab: Never --- diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index fe7b042d..56765fe1 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -102,6 +102,15 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5, x6); break; + case 0x08: + INST_NAME("OR Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_or8(dyn, ninst, x1, x2, x4, x5); + EBBACK(); + break; case 0x09: INST_NAME("OR Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -1128,6 +1137,13 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ZEROUP(ed); WBACK; break; + case 3: + INST_NAME("NEG Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED(0); + emit_neg32(dyn, ninst, rex, ed, x3, x4); + WBACK; + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index cbd5f0e1..f4160c2f 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -261,6 +261,73 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LD_D(xRDX, xEmu, offsetof(x64emu_t, regs[_DX])); LD_D(xRBX, xEmu, offsetof(x64emu_t, regs[_BX])); break; + case 0xAF: + INST_NAME("IMUL Gd, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + GETGD; + GETED(0); + if (box64_dynarec_test) { + // avoid noise during test + CLEAR_FLAGS(x3); + } + if (rex.w) { + // 64bits imul + UFLAG_IF { + MULH_D(x3, gd, ed); + MUL_D(gd, gd, ed); + IFX (X_PEND) { + UFLAG_OP1(x3); + UFLAG_RES(gd); + UFLAG_DF(x3, d_imul64); + } else { + SET_DFNONE(); + } + IFX (X_CF | X_OF) { + SRAI_D(x4, gd, 63); + XOR(x3, x3, x4); + SNEZ(x3, x3); + IFX (X_CF) { + BSTRINS_D(xFlags, x3, F_CF, F_CF); + } + IFX (X_OF) { + BSTRINS_D(xFlags, x3, F_OF, F_OF); + } + } + } else { + MULxw(gd, gd, ed); + } + } else { + // 32bits imul + UFLAG_IF { + MUL_D(gd, gd, ed); + SRLI_D(x3, gd, 32); + SLLI_W(gd, gd, 0); + IFX (X_PEND) { + UFLAG_RES(gd); + UFLAG_OP1(x3); + UFLAG_DF(x4, d_imul32); + } else IFX (X_CF | X_OF) { + SET_DFNONE(); + } + IFX (X_CF | X_OF) { + SRAI_W(x4, gd, 31); + SUB_D(x3, x3, x4); + SNEZ(x3, x3); + IFX (X_CF) { + BSTRINS_D(xFlags, x3, F_CF, F_CF); + } + IFX (X_OF) { + BSTRINS_D(xFlags, x3, F_OF, F_OF); + } + } + } else { + MULxw(gd, gd, ed); + } + SLLI_D(gd, gd, 32); + SRLI_D(gd, gd, 32); + } + break; case 0xB6: INST_NAME("MOVZX Gd, Eb"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_emit_logic.c b/src/dynarec/la64/dynarec_la64_emit_logic.c index 2aa4315d..34315330 100644 --- a/src/dynarec/la64/dynarec_la64_emit_logic.c +++ b/src/dynarec/la64/dynarec_la64_emit_logic.c @@ -245,7 +245,7 @@ void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 SET_DFNONE(); } - IFXA(X_ALL, la64_lbt) { + IFXA (X_ALL, la64_lbt) { if (rex.w) X64_OR_D(s1, s2); else @@ -326,3 +326,40 @@ void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in emit_pf(dyn, ninst, s1, s3, s4); } } + + +// emit OR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) +void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + IFX (X_PEND) { + SET_DF(s3, d_or8); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + IFXA (X_ALL, la64_lbt) { + X64_OR_B(s1, s2); + } + + OR(s1, s1, s2); + + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) return; + + CLEAR_FLAGS(s3); + IFX (X_SF) { + SRLI_D(s3, s1, 7); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} \ No newline at end of file diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c index 600c8c49..f47eb082 100644 --- a/src/dynarec/la64/dynarec_la64_emit_math.c +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -39,7 +39,10 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s if (la64_lbt) { IFX(X_ALL) { - X64_ADD_WU(s1, s2); + if (rex.w) + X64_ADD_DU(s1, s2); + else + X64_ADD_WU(s1, s2); } ADDxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); @@ -465,7 +468,10 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s if (la64_lbt) { IFX(X_ALL) { - X64_SUB_WU(s1, s2); + if (rex.w) + X64_SUB_DU(s1, s2); + else + X64_SUB_WU(s1, s2); } SUBxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); @@ -577,3 +583,79 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i emit_pf(dyn, ninst, s1, s3, s4); } } + + +// emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch +void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3) +{ + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, rex.w ? d_neg64 : d_neg32); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + if (!la64_lbt) { + IFX (X_AF | X_OF) { + MV(s3, s1); // s3 = op1 + } + } + + IFXA (X_ALL, la64_lbt) { + if (rex.w) + X64_SUB_DU(xZR, s1); + else + X64_SUB_WU(xZR, s1); + } + + NEGxw(s1, s1); + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) { + if (!rex.w) { + ZEROUP(s1); + } + return; + } + + CLEAR_FLAGS(s3); + IFX (X_CF) { + BEQZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + + IFX (X_AF | X_OF) { + OR(s3, s1, s3); // s3 = res | op1 + IFX (X_AF) { + /* af = bc & 0x8 */ + ANDI(s2, s3, 8); + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX (X_OF) { + /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ + SRLI_D(s2, s3, (rex.w ? 64 : 32) - 2); + SRLI_D(s3, s2, 1); + XOR(s2, s2, s3); + ANDI(s2, s2, 1); + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_OF); + } + } + IFX (X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s2); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } +} \ No newline at end of file diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 3405afcb..d3acc05f 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -441,6 +441,19 @@ #ifndef SET_HASCALLRET #define SET_HASCALLRET() #endif +#define UFLAG_OP1(A) \ + if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, op1)); } +#define UFLAG_OP2(A) \ + if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, op2)); } +#define UFLAG_OP12(A1, A2) \ + if (dyn->insts[ninst].x64.gen_flags) { \ + SDxw(A1, xEmu, offsetof(x64emu_t, op1)); \ + SDxw(A2, xEmu, offsetof(x64emu_t, op2)); \ + } +#define UFLAG_RES(A) \ + if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, res)); } +#define UFLAG_DF(r, A) \ + if (dyn->insts[ninst].x64.gen_flags) { SET_DF(r, A) } #define UFLAG_IF if (dyn->insts[ninst].x64.gen_flags) #ifndef DEFAULT #define DEFAULT \ @@ -542,8 +555,10 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_sub32c STEPNAME(emit_sub32c) #define emit_sub8 STEPNAME(emit_sub8) #define emit_sub8c STEPNAME(emit_sub8c) +#define emit_neg32 STEPNAME(emit_neg32) #define emit_or32 STEPNAME(emit_or32) #define emit_or32c STEPNAME(emit_or32c) +#define emit_or8 STEPNAME(emit_or8) #define emit_xor32 STEPNAME(emit_xor32) #define emit_and8 STEPNAME(emit_and8) #define emit_and8c STEPNAME(emit_and8c) @@ -601,8 +616,10 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5); void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5); +void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3); void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); +void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 9e584062..291b28da 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -309,6 +309,76 @@ f24-f31 fs0-fs7 Static registers Callee #define SEXT_W(rd, rs1) SLLI_W(rd, rs1, 0) +// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0]) +// GR[rd] = SignExtend(product[31:0], GRLEN) +#define MUL_W(rd, rj, rk) EMIT(type_3R(0b00000000000111000, rk, rj, rd)) + +// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0]) +// GR[rd] = SignExtend(product[63:32], GRLEN) +#define MULH_W(rd, rj, rk) EMIT(type_3R(0b00000000000111001, rk, rj, rd)) + +// product = unsigned(GR[rj][31:0]) * unsigned(GR[rk][31:0]) +// GR[rd] = SignExtend(product[63:32], GRLEN) +#define MULH_WU(rd, rj, rk) EMIT(type_3R(0b00000000000111010, rk, rj, rd)) + +// product = signed(GR[rj][63:0]) * signed(GR[rk][63:0]) +// GR[rd] = product[63:0] +#define MUL_D(rd, rj, rk) EMIT(type_3R(0b00000000000111011, rk, rj, rd)) + +// product = signed(GR[rj][63:0]) * signed(GR[rk][63:0]) +// GR[rd] = product[127:64] +#define MULH_D(rd, rj, rk) EMIT(type_3R(0b00000000000111100, rk, rj, rd)) + +// product = unsigned(GR[rj][63:0]) * unsigned(GR[rk][63:0]) +// GR[rd] = product[127:64] +#define MULH_DU(rd, rj, rk) EMIT(type_3R(0b00000000000111101, rk, rj, rd)) + +// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0]) +// GR[rd] = product[63:0] +#define MULW_D_W(rd, rj, rk) EMIT(type_3R(0b00000000000111110, rk, rj, rd)) + +// product = unsigned(GR[rj][31:0]) * unsigned(GR[rk][31:0]) +// GR[rd] = product[63:0] +#define MULW_D_WU(rd, rj, rk) EMIT(type_3R(0b00000000000111111, rk, rj, rd)) + +// quotient = signed(GR[rj][31:0]) / signed(GR[rk][31:0]) +// GR[rd] = SignExtend(quotient[31:0], GRLEN) +#define DIV_W(rd, rj, rk) EMIT(type_3R(0b00000000001000000, rk, rj, rd)) + +// quotient = unsigned(GR[rj][31:0]) / unsigned(GR[rk][31:0]) +// GR[rd] = SignExtend(quotient[31:0], GRLEN) +#define DIV_WU(rd, rj, rk) EMIT(type_3R(0b00000000001000010, rk, rj, rd)) + +// remainder = signed(GR[rj][31:0]) % signed(GR[rk][31:0]) +// GR[rd] = SignExtend(remainder[31:0], GRLEN) +#define MOD_W(rd, rj, rk) EMIT(type_3R(0b00000000001000001, rk, rj, rd)) + +// remainder = unsigned(GR[rj][31:0]) % unsigned(GR[rk][31:0]) +// GR[rd] = SignExtend(remainder[31:0], GRLEN) +#define MOD_WU(rd, rj, rk) EMIT(type_3R(0b00000000001000011, rk, rj, rd)) + +// GR[rd] = signed(GR[rj][63:0]) / signed(GR[rk][63:0]) +#define DIV_D(rd, rj, rk) EMIT(type_3R(0b00000000001000100, rk, rj, rd)) + +// GR[rd] = unsigned(GR[rj][63:0]) / unsigned(GR[rk][63:0]) +#define DIV_DU(rd, rj, rk) EMIT(type_3R(0b00000000001000110, rk, rj, rd)) + +// GR[rd] = signed(GR[rj] [63:0]) % signed(GR[rk] [63:0]) +#define MOD_D(rd, rj, rk) EMIT(type_3R(0b00000000001000101, rk, rj, rd)) + +// GR[rd] = unsigned(GR[rj] [63:0]) % unsigned(GR[rk] [63:0]) +#define MOD_DU(rd, rj, rk) EMIT(type_3R(0b00000000001000111, rk, rj, rd)) + +#define MULxw(rd, rj, rk) \ + do { \ + if (rex.w) { \ + MUL_D(rd, rj, rk); \ + } else { \ + MUL_W(rd, rj, rk); \ + } \ + } while (0) + + // bstr32[31:msbw+1] = GR[rd][31: msbw+1] // bstr32[msbw:lsbw] = GR[rj][msbw-lsbw:0] // bstr32[lsbw-1:0] = GR[rd][lsbw-1:0] @@ -1709,6 +1779,8 @@ LSX instruction starts with V, LASX instruction starts with XV. SUB_W(rd, rj, rk); \ } while (0) +#define NEGxw(rd, rs1) SUBxw(rd, xZR, rs1) + #define SUBz(rd, rj, rk) \ do { \ if (rex.is32bits) \ diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index e7191fc9..a7c54e58 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1445,6 +1445,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } break; case 0xAF: + // TODO: Refine this INST_NAME("IMUL Gd, Ed"); SETFLAGS(X_ALL, SF_PENDING); nextop = F8;