[LA64_DYNAREC] Added more opcodes (#1416)

* [LA64_DYNAREC] Added 0F AF IMUL opcode

* Update clang-format rules

* Added 08 OR opcode

* Added F7 /3 NEG opcode and fixed some potential bugs
This commit is contained in:
Yang Liu 2024-04-05 22:34:59 +08:00 committed by GitHub
parent 2e9b8eff59
commit 4d26021705
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 296 additions and 4 deletions

View File

@ -12,6 +12,6 @@ MaxEmptyLinesToKeep: 2
IndentCaseLabels: true
AlignConsecutiveMacros: true
WhitespaceSensitiveMacros: ['QUOTE']
IfMacros: ['IFX', 'IFX2', 'IF_PEND0R0', 'IFXX', 'IFX2X', 'IFXN', 'UFLAG_IF', 'PASS2IF']
IfMacros: ['IFX', 'IFX2', 'IFXA', 'IF_PEND0R0', 'IFXX', 'IFX2X', 'IFXN', 'UFLAG_IF', 'PASS2IF']
UseTab: Never
---

View File

@ -102,6 +102,15 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
i64 = F32S;
emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5, x6);
break;
case 0x08:
INST_NAME("OR Eb, Gb");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETEB(x1, 0);
GETGB(x2);
emit_or8(dyn, ninst, x1, x2, x4, x5);
EBBACK();
break;
case 0x09:
INST_NAME("OR Ed, Gd");
SETFLAGS(X_ALL, SF_SET_PENDING);
@ -1128,6 +1137,13 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
ZEROUP(ed);
WBACK;
break;
case 3:
INST_NAME("NEG Ed");
SETFLAGS(X_ALL, SF_SET_PENDING);
GETED(0);
emit_neg32(dyn, ninst, rex, ed, x3, x4);
WBACK;
break;
default:
DEFAULT;
}

View File

@ -261,6 +261,73 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
LD_D(xRDX, xEmu, offsetof(x64emu_t, regs[_DX]));
LD_D(xRBX, xEmu, offsetof(x64emu_t, regs[_BX]));
break;
case 0xAF:
INST_NAME("IMUL Gd, Ed");
SETFLAGS(X_ALL, SF_PENDING);
nextop = F8;
GETGD;
GETED(0);
if (box64_dynarec_test) {
// avoid noise during test
CLEAR_FLAGS(x3);
}
if (rex.w) {
// 64bits imul
UFLAG_IF {
MULH_D(x3, gd, ed);
MUL_D(gd, gd, ed);
IFX (X_PEND) {
UFLAG_OP1(x3);
UFLAG_RES(gd);
UFLAG_DF(x3, d_imul64);
} else {
SET_DFNONE();
}
IFX (X_CF | X_OF) {
SRAI_D(x4, gd, 63);
XOR(x3, x3, x4);
SNEZ(x3, x3);
IFX (X_CF) {
BSTRINS_D(xFlags, x3, F_CF, F_CF);
}
IFX (X_OF) {
BSTRINS_D(xFlags, x3, F_OF, F_OF);
}
}
} else {
MULxw(gd, gd, ed);
}
} else {
// 32bits imul
UFLAG_IF {
MUL_D(gd, gd, ed);
SRLI_D(x3, gd, 32);
SLLI_W(gd, gd, 0);
IFX (X_PEND) {
UFLAG_RES(gd);
UFLAG_OP1(x3);
UFLAG_DF(x4, d_imul32);
} else IFX (X_CF | X_OF) {
SET_DFNONE();
}
IFX (X_CF | X_OF) {
SRAI_W(x4, gd, 31);
SUB_D(x3, x3, x4);
SNEZ(x3, x3);
IFX (X_CF) {
BSTRINS_D(xFlags, x3, F_CF, F_CF);
}
IFX (X_OF) {
BSTRINS_D(xFlags, x3, F_OF, F_OF);
}
}
} else {
MULxw(gd, gd, ed);
}
SLLI_D(gd, gd, 32);
SRLI_D(gd, gd, 32);
}
break;
case 0xB6:
INST_NAME("MOVZX Gd, Eb");
nextop = F8;

View File

@ -326,3 +326,40 @@ void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit OR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed)
void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
{
IFX (X_PEND) {
SET_DF(s3, d_or8);
} else IFX (X_ALL) {
SET_DFNONE();
}
IFXA (X_ALL, la64_lbt) {
X64_OR_B(s1, s2);
}
OR(s1, s1, s2);
IFX (X_PEND) {
ST_B(s1, xEmu, offsetof(x64emu_t, res));
}
if (la64_lbt) return;
CLEAR_FLAGS(s3);
IFX (X_SF) {
SRLI_D(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX (X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX (X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}

View File

@ -39,6 +39,9 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
if (la64_lbt) {
IFX(X_ALL) {
if (rex.w)
X64_ADD_DU(s1, s2);
else
X64_ADD_WU(s1, s2);
}
ADDxw(s1, s1, s2);
@ -465,6 +468,9 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
if (la64_lbt) {
IFX(X_ALL) {
if (rex.w)
X64_SUB_DU(s1, s2);
else
X64_SUB_WU(s1, s2);
}
SUBxw(s1, s1, s2);
@ -577,3 +583,79 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch
void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3)
{
IFX (X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SET_DF(s3, rex.w ? d_neg64 : d_neg32);
} else IFX (X_ALL) {
SET_DFNONE();
}
if (!la64_lbt) {
IFX (X_AF | X_OF) {
MV(s3, s1); // s3 = op1
}
}
IFXA (X_ALL, la64_lbt) {
if (rex.w)
X64_SUB_DU(xZR, s1);
else
X64_SUB_WU(xZR, s1);
}
NEGxw(s1, s1);
IFX (X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
if (la64_lbt) {
if (!rex.w) {
ZEROUP(s1);
}
return;
}
CLEAR_FLAGS(s3);
IFX (X_CF) {
BEQZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_CF);
}
IFX (X_AF | X_OF) {
OR(s3, s1, s3); // s3 = res | op1
IFX (X_AF) {
/* af = bc & 0x8 */
ANDI(s2, s3, 8);
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_AF);
}
IFX (X_OF) {
/* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
SRLI_D(s2, s3, (rex.w ? 64 : 32) - 2);
SRLI_D(s3, s2, 1);
XOR(s2, s2, s3);
ANDI(s2, s2, 1);
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_OF);
}
}
IFX (X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX (X_PF) {
emit_pf(dyn, ninst, s1, s3, s2);
}
IFX (X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
}

View File

@ -441,6 +441,19 @@
#ifndef SET_HASCALLRET
#define SET_HASCALLRET()
#endif
#define UFLAG_OP1(A) \
if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, op1)); }
#define UFLAG_OP2(A) \
if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, op2)); }
#define UFLAG_OP12(A1, A2) \
if (dyn->insts[ninst].x64.gen_flags) { \
SDxw(A1, xEmu, offsetof(x64emu_t, op1)); \
SDxw(A2, xEmu, offsetof(x64emu_t, op2)); \
}
#define UFLAG_RES(A) \
if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, res)); }
#define UFLAG_DF(r, A) \
if (dyn->insts[ninst].x64.gen_flags) { SET_DF(r, A) }
#define UFLAG_IF if (dyn->insts[ninst].x64.gen_flags)
#ifndef DEFAULT
#define DEFAULT \
@ -542,8 +555,10 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
#define emit_sub32c STEPNAME(emit_sub32c)
#define emit_sub8 STEPNAME(emit_sub8)
#define emit_sub8c STEPNAME(emit_sub8c)
#define emit_neg32 STEPNAME(emit_neg32)
#define emit_or32 STEPNAME(emit_or32)
#define emit_or32c STEPNAME(emit_or32c)
#define emit_or8 STEPNAME(emit_or8)
#define emit_xor32 STEPNAME(emit_xor32)
#define emit_and8 STEPNAME(emit_and8)
#define emit_and8c STEPNAME(emit_and8c)
@ -601,8 +616,10 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5);
void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);

View File

@ -309,6 +309,76 @@ f24-f31 fs0-fs7 Static registers Callee
#define SEXT_W(rd, rs1) SLLI_W(rd, rs1, 0)
// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0])
// GR[rd] = SignExtend(product[31:0], GRLEN)
#define MUL_W(rd, rj, rk) EMIT(type_3R(0b00000000000111000, rk, rj, rd))
// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0])
// GR[rd] = SignExtend(product[63:32], GRLEN)
#define MULH_W(rd, rj, rk) EMIT(type_3R(0b00000000000111001, rk, rj, rd))
// product = unsigned(GR[rj][31:0]) * unsigned(GR[rk][31:0])
// GR[rd] = SignExtend(product[63:32], GRLEN)
#define MULH_WU(rd, rj, rk) EMIT(type_3R(0b00000000000111010, rk, rj, rd))
// product = signed(GR[rj][63:0]) * signed(GR[rk][63:0])
// GR[rd] = product[63:0]
#define MUL_D(rd, rj, rk) EMIT(type_3R(0b00000000000111011, rk, rj, rd))
// product = signed(GR[rj][63:0]) * signed(GR[rk][63:0])
// GR[rd] = product[127:64]
#define MULH_D(rd, rj, rk) EMIT(type_3R(0b00000000000111100, rk, rj, rd))
// product = unsigned(GR[rj][63:0]) * unsigned(GR[rk][63:0])
// GR[rd] = product[127:64]
#define MULH_DU(rd, rj, rk) EMIT(type_3R(0b00000000000111101, rk, rj, rd))
// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0])
// GR[rd] = product[63:0]
#define MULW_D_W(rd, rj, rk) EMIT(type_3R(0b00000000000111110, rk, rj, rd))
// product = unsigned(GR[rj][31:0]) * unsigned(GR[rk][31:0])
// GR[rd] = product[63:0]
#define MULW_D_WU(rd, rj, rk) EMIT(type_3R(0b00000000000111111, rk, rj, rd))
// quotient = signed(GR[rj][31:0]) / signed(GR[rk][31:0])
// GR[rd] = SignExtend(quotient[31:0], GRLEN)
#define DIV_W(rd, rj, rk) EMIT(type_3R(0b00000000001000000, rk, rj, rd))
// quotient = unsigned(GR[rj][31:0]) / unsigned(GR[rk][31:0])
// GR[rd] = SignExtend(quotient[31:0], GRLEN)
#define DIV_WU(rd, rj, rk) EMIT(type_3R(0b00000000001000010, rk, rj, rd))
// remainder = signed(GR[rj][31:0]) % signed(GR[rk][31:0])
// GR[rd] = SignExtend(remainder[31:0], GRLEN)
#define MOD_W(rd, rj, rk) EMIT(type_3R(0b00000000001000001, rk, rj, rd))
// remainder = unsigned(GR[rj][31:0]) % unsigned(GR[rk][31:0])
// GR[rd] = SignExtend(remainder[31:0], GRLEN)
#define MOD_WU(rd, rj, rk) EMIT(type_3R(0b00000000001000011, rk, rj, rd))
// GR[rd] = signed(GR[rj][63:0]) / signed(GR[rk][63:0])
#define DIV_D(rd, rj, rk) EMIT(type_3R(0b00000000001000100, rk, rj, rd))
// GR[rd] = unsigned(GR[rj][63:0]) / unsigned(GR[rk][63:0])
#define DIV_DU(rd, rj, rk) EMIT(type_3R(0b00000000001000110, rk, rj, rd))
// GR[rd] = signed(GR[rj] [63:0]) % signed(GR[rk] [63:0])
#define MOD_D(rd, rj, rk) EMIT(type_3R(0b00000000001000101, rk, rj, rd))
// GR[rd] = unsigned(GR[rj] [63:0]) % unsigned(GR[rk] [63:0])
#define MOD_DU(rd, rj, rk) EMIT(type_3R(0b00000000001000111, rk, rj, rd))
#define MULxw(rd, rj, rk) \
do { \
if (rex.w) { \
MUL_D(rd, rj, rk); \
} else { \
MUL_W(rd, rj, rk); \
} \
} while (0)
// bstr32[31:msbw+1] = GR[rd][31: msbw+1]
// bstr32[msbw:lsbw] = GR[rj][msbw-lsbw:0]
// bstr32[lsbw-1:0] = GR[rd][lsbw-1:0]
@ -1709,6 +1779,8 @@ LSX instruction starts with V, LASX instruction starts with XV.
SUB_W(rd, rj, rk); \
} while (0)
#define NEGxw(rd, rs1) SUBxw(rd, xZR, rs1)
#define SUBz(rd, rj, rk) \
do { \
if (rex.is32bits) \

View File

@ -1445,6 +1445,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
}
break;
case 0xAF:
// TODO: Refine this
INST_NAME("IMUL Gd, Ed");
SETFLAGS(X_ALL, SF_PENDING);
nextop = F8;