mirror of
https://github.com/ptitSeb/box64.git
synced 2024-11-23 06:30:22 +00:00
[LA64_DYNAREC] Added more opcodes (#1416)
* [LA64_DYNAREC] Added 0F AF IMUL opcode * Update clang-format rules * Added 08 OR opcode * Added F7 /3 NEG opcode and fixed some potential bugs
This commit is contained in:
parent
2e9b8eff59
commit
4d26021705
@ -12,6 +12,6 @@ MaxEmptyLinesToKeep: 2
|
||||
IndentCaseLabels: true
|
||||
AlignConsecutiveMacros: true
|
||||
WhitespaceSensitiveMacros: ['QUOTE']
|
||||
IfMacros: ['IFX', 'IFX2', 'IF_PEND0R0', 'IFXX', 'IFX2X', 'IFXN', 'UFLAG_IF', 'PASS2IF']
|
||||
IfMacros: ['IFX', 'IFX2', 'IFXA', 'IF_PEND0R0', 'IFXX', 'IFX2X', 'IFXN', 'UFLAG_IF', 'PASS2IF']
|
||||
UseTab: Never
|
||||
---
|
||||
|
@ -102,6 +102,15 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
|
||||
i64 = F32S;
|
||||
emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5, x6);
|
||||
break;
|
||||
case 0x08:
|
||||
INST_NAME("OR Eb, Gb");
|
||||
SETFLAGS(X_ALL, SF_SET_PENDING);
|
||||
nextop = F8;
|
||||
GETEB(x1, 0);
|
||||
GETGB(x2);
|
||||
emit_or8(dyn, ninst, x1, x2, x4, x5);
|
||||
EBBACK();
|
||||
break;
|
||||
case 0x09:
|
||||
INST_NAME("OR Ed, Gd");
|
||||
SETFLAGS(X_ALL, SF_SET_PENDING);
|
||||
@ -1128,6 +1137,13 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
|
||||
ZEROUP(ed);
|
||||
WBACK;
|
||||
break;
|
||||
case 3:
|
||||
INST_NAME("NEG Ed");
|
||||
SETFLAGS(X_ALL, SF_SET_PENDING);
|
||||
GETED(0);
|
||||
emit_neg32(dyn, ninst, rex, ed, x3, x4);
|
||||
WBACK;
|
||||
break;
|
||||
default:
|
||||
DEFAULT;
|
||||
}
|
||||
|
@ -261,6 +261,73 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
|
||||
LD_D(xRDX, xEmu, offsetof(x64emu_t, regs[_DX]));
|
||||
LD_D(xRBX, xEmu, offsetof(x64emu_t, regs[_BX]));
|
||||
break;
|
||||
case 0xAF:
|
||||
INST_NAME("IMUL Gd, Ed");
|
||||
SETFLAGS(X_ALL, SF_PENDING);
|
||||
nextop = F8;
|
||||
GETGD;
|
||||
GETED(0);
|
||||
if (box64_dynarec_test) {
|
||||
// avoid noise during test
|
||||
CLEAR_FLAGS(x3);
|
||||
}
|
||||
if (rex.w) {
|
||||
// 64bits imul
|
||||
UFLAG_IF {
|
||||
MULH_D(x3, gd, ed);
|
||||
MUL_D(gd, gd, ed);
|
||||
IFX (X_PEND) {
|
||||
UFLAG_OP1(x3);
|
||||
UFLAG_RES(gd);
|
||||
UFLAG_DF(x3, d_imul64);
|
||||
} else {
|
||||
SET_DFNONE();
|
||||
}
|
||||
IFX (X_CF | X_OF) {
|
||||
SRAI_D(x4, gd, 63);
|
||||
XOR(x3, x3, x4);
|
||||
SNEZ(x3, x3);
|
||||
IFX (X_CF) {
|
||||
BSTRINS_D(xFlags, x3, F_CF, F_CF);
|
||||
}
|
||||
IFX (X_OF) {
|
||||
BSTRINS_D(xFlags, x3, F_OF, F_OF);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MULxw(gd, gd, ed);
|
||||
}
|
||||
} else {
|
||||
// 32bits imul
|
||||
UFLAG_IF {
|
||||
MUL_D(gd, gd, ed);
|
||||
SRLI_D(x3, gd, 32);
|
||||
SLLI_W(gd, gd, 0);
|
||||
IFX (X_PEND) {
|
||||
UFLAG_RES(gd);
|
||||
UFLAG_OP1(x3);
|
||||
UFLAG_DF(x4, d_imul32);
|
||||
} else IFX (X_CF | X_OF) {
|
||||
SET_DFNONE();
|
||||
}
|
||||
IFX (X_CF | X_OF) {
|
||||
SRAI_W(x4, gd, 31);
|
||||
SUB_D(x3, x3, x4);
|
||||
SNEZ(x3, x3);
|
||||
IFX (X_CF) {
|
||||
BSTRINS_D(xFlags, x3, F_CF, F_CF);
|
||||
}
|
||||
IFX (X_OF) {
|
||||
BSTRINS_D(xFlags, x3, F_OF, F_OF);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MULxw(gd, gd, ed);
|
||||
}
|
||||
SLLI_D(gd, gd, 32);
|
||||
SRLI_D(gd, gd, 32);
|
||||
}
|
||||
break;
|
||||
case 0xB6:
|
||||
INST_NAME("MOVZX Gd, Eb");
|
||||
nextop = F8;
|
||||
|
@ -245,7 +245,7 @@ void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
|
||||
SET_DFNONE();
|
||||
}
|
||||
|
||||
IFXA(X_ALL, la64_lbt) {
|
||||
IFXA (X_ALL, la64_lbt) {
|
||||
if (rex.w)
|
||||
X64_OR_D(s1, s2);
|
||||
else
|
||||
@ -326,3 +326,40 @@ void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
|
||||
emit_pf(dyn, ninst, s1, s3, s4);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// emit OR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed)
|
||||
void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
|
||||
{
|
||||
IFX (X_PEND) {
|
||||
SET_DF(s3, d_or8);
|
||||
} else IFX (X_ALL) {
|
||||
SET_DFNONE();
|
||||
}
|
||||
|
||||
IFXA (X_ALL, la64_lbt) {
|
||||
X64_OR_B(s1, s2);
|
||||
}
|
||||
|
||||
OR(s1, s1, s2);
|
||||
|
||||
IFX (X_PEND) {
|
||||
ST_B(s1, xEmu, offsetof(x64emu_t, res));
|
||||
}
|
||||
|
||||
if (la64_lbt) return;
|
||||
|
||||
CLEAR_FLAGS(s3);
|
||||
IFX (X_SF) {
|
||||
SRLI_D(s3, s1, 7);
|
||||
BEQZ(s3, 8);
|
||||
ORI(xFlags, xFlags, 1 << F_SF);
|
||||
}
|
||||
IFX (X_ZF) {
|
||||
BNEZ(s1, 8);
|
||||
ORI(xFlags, xFlags, 1 << F_ZF);
|
||||
}
|
||||
IFX (X_PF) {
|
||||
emit_pf(dyn, ninst, s1, s3, s4);
|
||||
}
|
||||
}
|
@ -39,7 +39,10 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
|
||||
|
||||
if (la64_lbt) {
|
||||
IFX(X_ALL) {
|
||||
X64_ADD_WU(s1, s2);
|
||||
if (rex.w)
|
||||
X64_ADD_DU(s1, s2);
|
||||
else
|
||||
X64_ADD_WU(s1, s2);
|
||||
}
|
||||
ADDxw(s1, s1, s2);
|
||||
if (!rex.w) ZEROUP(s1);
|
||||
@ -465,7 +468,10 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
|
||||
|
||||
if (la64_lbt) {
|
||||
IFX(X_ALL) {
|
||||
X64_SUB_WU(s1, s2);
|
||||
if (rex.w)
|
||||
X64_SUB_DU(s1, s2);
|
||||
else
|
||||
X64_SUB_WU(s1, s2);
|
||||
}
|
||||
SUBxw(s1, s1, s2);
|
||||
if (!rex.w) ZEROUP(s1);
|
||||
@ -577,3 +583,79 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
|
||||
emit_pf(dyn, ninst, s1, s3, s4);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch
|
||||
void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3)
|
||||
{
|
||||
IFX (X_PEND) {
|
||||
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
|
||||
SET_DF(s3, rex.w ? d_neg64 : d_neg32);
|
||||
} else IFX (X_ALL) {
|
||||
SET_DFNONE();
|
||||
}
|
||||
|
||||
if (!la64_lbt) {
|
||||
IFX (X_AF | X_OF) {
|
||||
MV(s3, s1); // s3 = op1
|
||||
}
|
||||
}
|
||||
|
||||
IFXA (X_ALL, la64_lbt) {
|
||||
if (rex.w)
|
||||
X64_SUB_DU(xZR, s1);
|
||||
else
|
||||
X64_SUB_WU(xZR, s1);
|
||||
}
|
||||
|
||||
NEGxw(s1, s1);
|
||||
IFX (X_PEND) {
|
||||
SDxw(s1, xEmu, offsetof(x64emu_t, res));
|
||||
}
|
||||
|
||||
if (la64_lbt) {
|
||||
if (!rex.w) {
|
||||
ZEROUP(s1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
CLEAR_FLAGS(s3);
|
||||
IFX (X_CF) {
|
||||
BEQZ(s1, 8);
|
||||
ORI(xFlags, xFlags, 1 << F_CF);
|
||||
}
|
||||
|
||||
IFX (X_AF | X_OF) {
|
||||
OR(s3, s1, s3); // s3 = res | op1
|
||||
IFX (X_AF) {
|
||||
/* af = bc & 0x8 */
|
||||
ANDI(s2, s3, 8);
|
||||
BEQZ(s2, 8);
|
||||
ORI(xFlags, xFlags, 1 << F_AF);
|
||||
}
|
||||
IFX (X_OF) {
|
||||
/* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
|
||||
SRLI_D(s2, s3, (rex.w ? 64 : 32) - 2);
|
||||
SRLI_D(s3, s2, 1);
|
||||
XOR(s2, s2, s3);
|
||||
ANDI(s2, s2, 1);
|
||||
BEQZ(s2, 8);
|
||||
ORI(xFlags, xFlags, 1 << F_OF);
|
||||
}
|
||||
}
|
||||
IFX (X_SF) {
|
||||
BGE(s1, xZR, 8);
|
||||
ORI(xFlags, xFlags, 1 << F_SF);
|
||||
}
|
||||
if (!rex.w) {
|
||||
ZEROUP(s1);
|
||||
}
|
||||
IFX (X_PF) {
|
||||
emit_pf(dyn, ninst, s1, s3, s2);
|
||||
}
|
||||
IFX (X_ZF) {
|
||||
BNEZ(s1, 8);
|
||||
ORI(xFlags, xFlags, 1 << F_ZF);
|
||||
}
|
||||
}
|
@ -441,6 +441,19 @@
|
||||
#ifndef SET_HASCALLRET
|
||||
#define SET_HASCALLRET()
|
||||
#endif
|
||||
#define UFLAG_OP1(A) \
|
||||
if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, op1)); }
|
||||
#define UFLAG_OP2(A) \
|
||||
if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, op2)); }
|
||||
#define UFLAG_OP12(A1, A2) \
|
||||
if (dyn->insts[ninst].x64.gen_flags) { \
|
||||
SDxw(A1, xEmu, offsetof(x64emu_t, op1)); \
|
||||
SDxw(A2, xEmu, offsetof(x64emu_t, op2)); \
|
||||
}
|
||||
#define UFLAG_RES(A) \
|
||||
if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, res)); }
|
||||
#define UFLAG_DF(r, A) \
|
||||
if (dyn->insts[ninst].x64.gen_flags) { SET_DF(r, A) }
|
||||
#define UFLAG_IF if (dyn->insts[ninst].x64.gen_flags)
|
||||
#ifndef DEFAULT
|
||||
#define DEFAULT \
|
||||
@ -542,8 +555,10 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
|
||||
#define emit_sub32c STEPNAME(emit_sub32c)
|
||||
#define emit_sub8 STEPNAME(emit_sub8)
|
||||
#define emit_sub8c STEPNAME(emit_sub8c)
|
||||
#define emit_neg32 STEPNAME(emit_neg32)
|
||||
#define emit_or32 STEPNAME(emit_or32)
|
||||
#define emit_or32c STEPNAME(emit_or32c)
|
||||
#define emit_or8 STEPNAME(emit_or8)
|
||||
#define emit_xor32 STEPNAME(emit_xor32)
|
||||
#define emit_and8 STEPNAME(emit_and8)
|
||||
#define emit_and8c STEPNAME(emit_and8c)
|
||||
@ -601,8 +616,10 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
|
||||
void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
|
||||
void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
|
||||
void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5);
|
||||
void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
|
||||
void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
|
||||
void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
|
||||
void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
|
||||
void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
|
||||
void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
|
||||
void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
|
||||
|
@ -309,6 +309,76 @@ f24-f31 fs0-fs7 Static registers Callee
|
||||
|
||||
#define SEXT_W(rd, rs1) SLLI_W(rd, rs1, 0)
|
||||
|
||||
// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0])
|
||||
// GR[rd] = SignExtend(product[31:0], GRLEN)
|
||||
#define MUL_W(rd, rj, rk) EMIT(type_3R(0b00000000000111000, rk, rj, rd))
|
||||
|
||||
// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0])
|
||||
// GR[rd] = SignExtend(product[63:32], GRLEN)
|
||||
#define MULH_W(rd, rj, rk) EMIT(type_3R(0b00000000000111001, rk, rj, rd))
|
||||
|
||||
// product = unsigned(GR[rj][31:0]) * unsigned(GR[rk][31:0])
|
||||
// GR[rd] = SignExtend(product[63:32], GRLEN)
|
||||
#define MULH_WU(rd, rj, rk) EMIT(type_3R(0b00000000000111010, rk, rj, rd))
|
||||
|
||||
// product = signed(GR[rj][63:0]) * signed(GR[rk][63:0])
|
||||
// GR[rd] = product[63:0]
|
||||
#define MUL_D(rd, rj, rk) EMIT(type_3R(0b00000000000111011, rk, rj, rd))
|
||||
|
||||
// product = signed(GR[rj][63:0]) * signed(GR[rk][63:0])
|
||||
// GR[rd] = product[127:64]
|
||||
#define MULH_D(rd, rj, rk) EMIT(type_3R(0b00000000000111100, rk, rj, rd))
|
||||
|
||||
// product = unsigned(GR[rj][63:0]) * unsigned(GR[rk][63:0])
|
||||
// GR[rd] = product[127:64]
|
||||
#define MULH_DU(rd, rj, rk) EMIT(type_3R(0b00000000000111101, rk, rj, rd))
|
||||
|
||||
// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0])
|
||||
// GR[rd] = product[63:0]
|
||||
#define MULW_D_W(rd, rj, rk) EMIT(type_3R(0b00000000000111110, rk, rj, rd))
|
||||
|
||||
// product = unsigned(GR[rj][31:0]) * unsigned(GR[rk][31:0])
|
||||
// GR[rd] = product[63:0]
|
||||
#define MULW_D_WU(rd, rj, rk) EMIT(type_3R(0b00000000000111111, rk, rj, rd))
|
||||
|
||||
// quotient = signed(GR[rj][31:0]) / signed(GR[rk][31:0])
|
||||
// GR[rd] = SignExtend(quotient[31:0], GRLEN)
|
||||
#define DIV_W(rd, rj, rk) EMIT(type_3R(0b00000000001000000, rk, rj, rd))
|
||||
|
||||
// quotient = unsigned(GR[rj][31:0]) / unsigned(GR[rk][31:0])
|
||||
// GR[rd] = SignExtend(quotient[31:0], GRLEN)
|
||||
#define DIV_WU(rd, rj, rk) EMIT(type_3R(0b00000000001000010, rk, rj, rd))
|
||||
|
||||
// remainder = signed(GR[rj][31:0]) % signed(GR[rk][31:0])
|
||||
// GR[rd] = SignExtend(remainder[31:0], GRLEN)
|
||||
#define MOD_W(rd, rj, rk) EMIT(type_3R(0b00000000001000001, rk, rj, rd))
|
||||
|
||||
// remainder = unsigned(GR[rj][31:0]) % unsigned(GR[rk][31:0])
|
||||
// GR[rd] = SignExtend(remainder[31:0], GRLEN)
|
||||
#define MOD_WU(rd, rj, rk) EMIT(type_3R(0b00000000001000011, rk, rj, rd))
|
||||
|
||||
// GR[rd] = signed(GR[rj][63:0]) / signed(GR[rk][63:0])
|
||||
#define DIV_D(rd, rj, rk) EMIT(type_3R(0b00000000001000100, rk, rj, rd))
|
||||
|
||||
// GR[rd] = unsigned(GR[rj][63:0]) / unsigned(GR[rk][63:0])
|
||||
#define DIV_DU(rd, rj, rk) EMIT(type_3R(0b00000000001000110, rk, rj, rd))
|
||||
|
||||
// GR[rd] = signed(GR[rj] [63:0]) % signed(GR[rk] [63:0])
|
||||
#define MOD_D(rd, rj, rk) EMIT(type_3R(0b00000000001000101, rk, rj, rd))
|
||||
|
||||
// GR[rd] = unsigned(GR[rj] [63:0]) % unsigned(GR[rk] [63:0])
|
||||
#define MOD_DU(rd, rj, rk) EMIT(type_3R(0b00000000001000111, rk, rj, rd))
|
||||
|
||||
#define MULxw(rd, rj, rk) \
|
||||
do { \
|
||||
if (rex.w) { \
|
||||
MUL_D(rd, rj, rk); \
|
||||
} else { \
|
||||
MUL_W(rd, rj, rk); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
// bstr32[31:msbw+1] = GR[rd][31: msbw+1]
|
||||
// bstr32[msbw:lsbw] = GR[rj][msbw-lsbw:0]
|
||||
// bstr32[lsbw-1:0] = GR[rd][lsbw-1:0]
|
||||
@ -1709,6 +1779,8 @@ LSX instruction starts with V, LASX instruction starts with XV.
|
||||
SUB_W(rd, rj, rk); \
|
||||
} while (0)
|
||||
|
||||
#define NEGxw(rd, rs1) SUBxw(rd, xZR, rs1)
|
||||
|
||||
#define SUBz(rd, rj, rk) \
|
||||
do { \
|
||||
if (rex.is32bits) \
|
||||
|
@ -1445,6 +1445,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
|
||||
}
|
||||
break;
|
||||
case 0xAF:
|
||||
// TODO: Refine this
|
||||
INST_NAME("IMUL Gd, Ed");
|
||||
SETFLAGS(X_ALL, SF_PENDING);
|
||||
nextop = F8;
|
||||
|
Loading…
Reference in New Issue
Block a user