[RV64_DYNAREC] Made eflags emulation branchless with xtheadcondmov (#2019)
Some checks failed
Build and Release Box64 / build (ubuntu-latest, ANDROID, Release) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, ANDROID, Trace) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, ARM64, Box32) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, ARM64, Release) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, ARM64, StaticBuild) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, ARM64, Trace) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, LARCH64, Box32) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, LARCH64, Release) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, LARCH64, StaticBuild) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, LARCH64, Trace) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, RISCV, Box32) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, RISCV, Release) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, RISCV, StaticBuild) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, RISCV, Trace) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, RK3588, Box32) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, RK3588, Release) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, RK3588, StaticBuild) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, RK3588, Trace) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, TERMUX, Release) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, TERMUX, Trace) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, X64, Box32) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, X64, Release) (push) Has been cancelled
Build and Release Box64 / build (ubuntu-latest, X64, Trace) (push) Has been cancelled

* [RV64_DYNAREC] Made eflags emulation branchless with xtheadcondmov

* more
This commit is contained in:
Yang Liu 2024-11-12 02:53:55 +08:00 committed by GitHub
parent 56e813ccb7
commit b02942c0b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 299 additions and 447 deletions

View File

@ -563,6 +563,9 @@ HWCAP2_AFP
} else if (p != NULL && !strcasecmp(p, "xtheadmempair")) {
RV64_Detect_Function();
rv64_xtheadmempair = 0;
} else if (p != NULL && !strcasecmp(p, "xtheadcondmov")) {
RV64_Detect_Function();
rv64_xtheadcondmov = 0;
}
printf_log(LOG_INFO, "Dynarec for RISC-V ");
@ -577,8 +580,8 @@ HWCAP2_AFP
if(rv64_xtheadbb) printf_log(LOG_INFO, " XTheadBb");
if(rv64_xtheadbs) printf_log(LOG_INFO, " XTheadBs");
if (rv64_xtheadmempair) printf_log(LOG_INFO, " XTheadMemPair");
if (rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov");
// Disable the display since these are only detected but never used.
// if(rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov");
// if(rv64_xtheadmemidx) printf_log(LOG_INFO, " XTheadMemIdx");
// if(rv64_xtheadfmemidx) printf_log(LOG_INFO, " XTheadFMemIdx");
// if(rv64_xtheadmac) printf_log(LOG_INFO, " XTheadMac");

View File

@ -58,7 +58,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
FAST_8BIT_OPERATION(wb, gb, x1, ADD(wb, wb, x1));
GETEB(x1, 0);
GETGB(x2);
emit_add8(dyn, ninst, x1, x2, x4, x5);
emit_add8(dyn, ninst, x1, x2, x4, x5, x6);
EBBACK(x5, 0);
break;
case 0x01:
@ -77,7 +77,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
FAST_8BIT_OPERATION(gb, wb, x1, ADD(gb, gb, x1));
GETEB(x1, 0);
GETGB(x2);
emit_add8(dyn, ninst, x2, x1, x4, x5);
emit_add8(dyn, ninst, x2, x1, x4, x5, x6);
GBBACK(x5);
break;
case 0x03:
@ -93,7 +93,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
SETFLAGS(X_ALL, SF_SET_PENDING);
u8 = F8;
ANDI(x1, xRAX, 0xff);
emit_add8c(dyn, ninst, x1, u8, x3, x4, x5);
emit_add8c(dyn, ninst, x1, u8, x3, x4, x5, x6);
ANDI(xRAX, xRAX, ~0xff);
OR(xRAX, xRAX, x1);
break;

View File

@ -58,7 +58,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEB(x1, 1);
u8 = F8;
emit_add8c(dyn, ninst, x1, u8, x2, x4, x5);
emit_add8c(dyn, ninst, x1, u8, x2, x4, x5, x6);
EBBACK(x5, 0);
break;
case 1: // OR

View File

@ -1105,7 +1105,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
INST_NAME("NEG Eb");
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEB(x1, 0);
emit_neg8(dyn, ninst, x1, x2, x4);
emit_neg8(dyn, ninst, x1, x2, x4, x5);
EBBACK(x5, 0);
break;
case 4:
@ -1175,7 +1175,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
INST_NAME("NEG Ed");
SETFLAGS(X_ALL, SF_SET_PENDING);
GETED(0);
emit_neg32(dyn, ninst, rex, ed, x3, x4);
emit_neg32(dyn, ninst, rex, ed, x3, x4, x5, x6);
WBACK;
break;
case 4:

View File

@ -1762,7 +1762,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
GETED(1);
GETGD;
u8 = F8;
emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4);
emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4, x5);
WBACK;
} else {
FAKEED;
@ -1824,7 +1824,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
GETGD;
u8 = F8;
u8 &= (rex.w ? 0x3f : 0x1f);
emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4);
emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4, x5);
WBACK;
} else {
FAKEED;
@ -2251,7 +2251,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
GETGB(x2);
if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2)))
MV(x9, ed);
emit_add8(dyn, ninst, ed, gd, x4, x5);
emit_add8(dyn, ninst, ed, gd, x4, x5, x6);
if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2)))
MV(gd, x9);
EBBACK(x5, 0);

View File

@ -165,7 +165,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEBO(x1, 1);
u8 = F8;
emit_add8c(dyn, ninst, x1, u8, x2, x4, x5);
emit_add8c(dyn, ninst, x1, u8, x2, x4, x5, x6);
EBBACK(x5, 0);
break;
case 1: // OR

View File

@ -378,7 +378,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
gd = xRAX + (opcode&7);
ZEXTH(x1, gd);
emit_inc16(dyn, ninst, x1, x2, x3, x4);
emit_inc16(dyn, ninst, x1, x2, x3, x4, x5);
INSHz(gd, x1, x3, x4, 1, 0);
break;
case 0x48:
@ -1196,7 +1196,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
INST_NAME("NEG Ew");
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEW(x1, 0);
emit_neg16(dyn, ninst, ed, x2, x4);
emit_neg16(dyn, ninst, ed, x2, x4, x5);
EWBACK;
break;
case 4:
@ -1289,7 +1289,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
INST_NAME("INC Ew");
SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
GETEW(x1, 0);
emit_inc16(dyn, ninst, x1, x2, x4, x5);
emit_inc16(dyn, ninst, x1, x2, x4, x5, x6);
EWBACK;
break;
case 1:

View File

@ -78,7 +78,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
FAST_8BIT_OPERATION(gb, wb, x1, ADD(gb, gb, x1));
GETEB32(x2, 0);
GETGB(x1);
emit_add8(dyn, ninst, x1, x2, x3, x4);
emit_add8(dyn, ninst, x1, x2, x3, x4, x6);
GBBACK(x4);
break;
case 0x03:

View File

@ -34,15 +34,13 @@ void emit_xor8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
ANDI(s1, s1, 0xff);
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_PEND) {
SB(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
IFX (X_ZF) {
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -62,15 +60,13 @@ void emit_xor8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s
ANDI(s1, s1, 0xff);
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_PEND) {
SB(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -92,8 +88,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
// test sign bit before zeroup.
IFX(X_SF) {
if (!rex.w) SEXT_W(s1, s1);
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w && s1!=s2) {
ZEROUP(s1);
@ -104,8 +99,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -132,8 +126,7 @@ void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
// test sign bit before zeroup.
IFX(X_SF) {
if (!rex.w) SEXT_W(s1, s1);
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w) {
ZEROUP(s1);
@ -144,8 +137,7 @@ void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -170,13 +162,11 @@ void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
}
IFX(X_ZF | X_SF) {
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_SF) {
SRLI(s3, s1, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
}
IFX(X_PF) {
@ -201,13 +191,11 @@ void emit_or16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) {
IFX(X_SF) {
SRLI(s3, s1, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -229,8 +217,7 @@ void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
// test sign bit before zeroup.
IFX(X_SF) {
if (!rex.w) SEXT_W(s1, s1);
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w) {
ZEROUP(s1);
@ -241,8 +228,7 @@ void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -268,8 +254,7 @@ void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
// test sign bit before zeroup.
IFX(X_SF) {
if (!rex.w) SEXT_W(s1, s1);
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w) {
ZEROUP(s1);
@ -280,8 +265,7 @@ void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -307,12 +291,10 @@ void emit_and8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
}
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -337,12 +319,10 @@ void emit_and8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s
}
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -364,12 +344,10 @@ void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
}
IFX(X_SF) {
SRLI(s3, s1, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -394,12 +372,10 @@ void emit_and32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
}
IFX(X_SF) {
SRLI(s3, s1, rex.w?63:31);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -429,12 +405,10 @@ void emit_and32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
}
IFX(X_SF) {
SRLI(s3, s1, rex.w?63:31);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -458,12 +432,10 @@ void emit_or8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
}
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);

View File

@ -35,31 +35,31 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
IFX(X_CF) {
if (rex.w) {
AND(s5, xMASK, s1);
if(rv64_zba) ADDUW(s5, s2, s5); else {AND(s4, xMASK, s2); ADD(s5, s5, s4);} // lo
if (rv64_zba) // lo
ADDUW(s5, s2, s5);
else {
AND(s4, xMASK, s2);
ADD(s5, s5, s4);
}
SRLI(s3, s1, 0x20);
SRLI(s4, s2, 0x20);
ADD(s4, s4, s3);
SRLI(s5, s5, 0x20);
ADD(s5, s5, s4); // hi
SRAI(s5, s5, 0x20);
BEQZ(s5, 8);
ORI(xFlags, xFlags, 1 << F_CF);
} else {
AND(s3, s1, xMASK);
AND(s4, s2, xMASK);
ADD(s5, s3, s4);
SRLI(s5, s5, 0x20);
BEQZ(s5, 8);
ORI(xFlags, xFlags, 1 << F_CF);
}
SET_FLAGS_NEZ(s5, F_CF, s4);
}
IFX(X_AF | X_OF) {
OR(s3, s1, s2); // s3 = op1 | op2
AND(s4, s1, s2); // s4 = op1 & op2
AND(s4, s1, s2); // s4 = op1 & op2
}
ADDxw(s1, s1, s2);
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
@ -73,21 +73,18 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s4, F_AF, s5);
}
IFX(X_OF) {
SRLI(s3, s3, rex.w?62:30);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s5);
}
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s5);
}
if (!rex.w) {
ZEROUP(s1);
@ -96,8 +93,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
emit_pf(dyn, ninst, s1, s3, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
}
@ -131,23 +127,25 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
IFX(X_CF) {
if (rex.w) {
AND(s5, xMASK, s1);
if(rv64_zba) ADDUW(s5, s2, s5); else {AND(s4, xMASK, s2); ADD(s5, s5, s4);} // lo
if (rv64_zba) // lo
ADDUW(s5, s2, s5);
else {
AND(s4, xMASK, s2);
ADD(s5, s5, s4);
}
SRLI(s3, s1, 0x20);
SRLI(s4, s2, 0x20);
ADD(s4, s4, s3);
SRLI(s5, s5, 0x20);
ADD(s5, s5, s4); // hi
SRAI(s5, s5, 0x20);
BEQZ(s5, 8);
ORI(xFlags, xFlags, 1 << F_CF);
} else {
AND(s3, s1, xMASK);
AND(s4, s2, xMASK);
ADD(s5, s3, s4);
SRLI(s5, s5, 0x20);
BEQZ(s5, 8);
ORI(xFlags, xFlags, 1 << F_CF);
}
SET_FLAGS_NEZ(s5, F_CF, s4);
}
IFX(X_AF | X_OF) {
OR(s3, s1, s2); // s3 = op1 | op2
@ -174,21 +172,18 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s4, F_AF, s5);
}
IFX(X_OF) {
SRLI(s3, s3, rex.w?62:30);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s5);
}
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s5);
}
if (!rex.w) {
ZEROUP(s1);
@ -197,8 +192,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
emit_pf(dyn, ninst, s1, s3, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
}
@ -232,35 +226,30 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s4, F_AF, s5);
}
IFX(X_OF) {
SRLI(s3, s3, 14);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s5);
}
}
IFX(X_CF) {
SRLI(s3, s1, 16);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_CF);
SET_FLAGS_NEZ(s3, F_CF, s4);
}
ZEXTH(s1, s1);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_SF) {
SRLI(s3, s1, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -268,7 +257,7 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
}
// emit ADD8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
{
CLEAR_FLAGS();
IFX(X_PEND) {
@ -294,35 +283,30 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s4, F_AF, s5);
}
IFX(X_OF) {
SRLI(s3, s3, 6);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s5);
}
}
IFX(X_CF) {
SRLI(s3, s1, 8);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_CF);
SET_FLAGS_NEZ(s3, F_CF, s4);
}
IFX(X_PEND) {
SH(s1, xEmu, offsetof(x64emu_t, res));
}
ANDI(s1, s1, 0xff);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -330,7 +314,7 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
}
// emit ADD8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch
void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4)
void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4, int s5)
{
CLEAR_FLAGS();
IFX(X_PEND) {
@ -358,35 +342,30 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s4, F_AF, s5);
}
IFX(X_OF) {
SRLI(s3, s3, 6);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s5);
}
}
IFX(X_CF) {
SRLI(s3, s1, 8);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_CF);
SET_FLAGS_NEZ(s3, F_CF, s4);
}
IFX(X_PEND) {
SH(s1, xEmu, offsetof(x64emu_t, res));
}
ANDI(s1, s1, 0xff);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -414,16 +393,14 @@ void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
ANDI(s1, s1, 0xff);
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_PEND) {
SB(s1, xEmu, offsetof(x64emu_t, res));
}
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 8);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -460,15 +437,13 @@ void emit_sub16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
}
SLLI(s1, s1, 48);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
SRLI(s1, s1, 48);
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 16);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -498,16 +473,14 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w) {
ZEROUP(s1);
}
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -564,16 +537,14 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w) {
ZEROUP(s1);
}
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -612,30 +583,26 @@ void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s2, s3, 0x08); // AF: cc & 0x08
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s2, F_AF, s4);
}
IFX(X_OF) {
SRLI(s3, s3, 6);
SRLI(s2, s3, 1);
XOR(s3, s3, s2);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s4);
}
}
IFX(X_SF) {
ANDI(s2, s1, 0x80);
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s2, F_SF, s4);
}
ANDI(s1, s1, 0xff);
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s2);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
}
@ -668,30 +635,26 @@ void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
OR(s3, s3, s4); // cc = (res & (~op1 | op2)) | (~op1 & op2)
IFX(X_AF) {
ANDI(s2, s3, 0x08); // AF: cc & 0x08
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s2, F_AF, s4);
}
IFX(X_OF) {
SRLI(s3, s3, 6);
SRLI(s2, s3, 1);
XOR(s3, s3, s2);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s4);
}
}
IFX(X_SF) {
ANDI(s2, s1, 0x80);
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s2, F_SF, s4);
}
ANDI(s1, s1, 0xff);
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s2);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
}
@ -727,21 +690,18 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s2, s3, 0x08); // AF: cc & 0x08
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s2, F_AF, s4);
}
IFX(X_OF) {
SRLI(s3, s3, rex.w?62:30);
SRLI(s2, s3, 1);
XOR(s3, s3, s2);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s4);
}
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s4, s5);
}
if (!rex.w) {
ZEROUP(s1);
@ -750,8 +710,7 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
emit_pf(dyn, ninst, s1, s3, s2);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
}
@ -783,21 +742,18 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
OR(s3, s3, s5); // cc = (res & (~op1 | op2)) | (~op1 & op2)
IFX(X_AF) {
ANDI(s2, s3, 0x08); // AF: cc & 0x08
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s2, F_AF, s4);
}
IFX(X_OF) {
SRLI(s3, s3, rex.w?62:30);
SRLI(s2, s3, 1);
XOR(s3, s3, s2);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s4);
}
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s4, s5);
}
if (!rex.w) {
ZEROUP(s1);
@ -806,13 +762,12 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
emit_pf(dyn, ninst, s1, s3, s2);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
}
// emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch
void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
{
IFX(X_ALL) {
ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF)));
@ -843,29 +798,25 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s4, F_AF, s5);
}
IFX(X_OF) {
SRLI(s3, s3, 14);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s5);
}
}
ZEXTH(s1, s1);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_SF) {
SRLI(s3, s1, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -900,30 +851,26 @@ void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
OR(s3, s3, s5); // cc = (res & (~op1 | op2)) | (~op1 & op2)
IFX(X_AF) {
ANDI(s2, s3, 0x08); // AF: cc & 0x08
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s2, F_AF, s5);
}
IFX(X_OF) {
SRLI(s3, s3, 14);
SRLI(s2, s3, 1);
XOR(s3, s3, s2);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s5);
}
}
SLLIW(s1, s1, 16);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s4, s5);
}
SRLIW(s1, s1, 16);
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s2);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
}
@ -956,12 +903,10 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 8);
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s5);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -1000,34 +945,29 @@ void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s4, F_AF, s5);
}
IFX(X_OF) {
SRLI(s3, s3, 6);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s5);
}
}
IFX(X_CF) {
SRLI(s3, s1, 8);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_CF);
SET_FLAGS_NEZ(s3, F_CF, s5);
}
ANDI(s1, s1, 0xff);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -1070,8 +1010,7 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
CLEAR_FLAGS();
SLLIW(s1, s1, 16);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s4, s5);
}
SRLIW(s1, s1, 16);
@ -1081,8 +1020,7 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 16);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -1111,8 +1049,7 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
CLEAR_FLAGS();
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s4, s5);
}
if (!rex.w) {
ZEROUP(s1);
@ -1124,8 +1061,7 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -1133,7 +1069,7 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
}
// emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch
void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3)
void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
{
CLEAR_FLAGS();
IFX(X_PEND) {
@ -1152,8 +1088,7 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
}
IFX(X_CF) {
BEQZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_CF);
SET_FLAGS_NEZ(s1, F_CF, s4);
}
IFX(X_AF | X_OF) {
@ -1161,8 +1096,7 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
IFX(X_AF) {
/* af = bc & 0x8 */
ANDI(s2, s3, 8);
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s2, F_AF, s4);
}
IFX(X_OF) {
/* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
@ -1170,13 +1104,11 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
SRLI(s3, s2, 1);
XOR(s2, s2, s3);
ANDI(s2, s2, 1);
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s2, F_OF2, s4);
}
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s4, s5);
}
if (!rex.w) {
ZEROUP(s1);
@ -1185,13 +1117,12 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
emit_pf(dyn, ninst, s1, s3, s2);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
}
// emit NEG16 instruction, from s1, store result in s1 using s2 and s3 as scratch
void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
@ -1211,8 +1142,7 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
}
IFX(X_CF) {
BEQZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_CF);
SET_FLAGS_NEZ(s1, F_CF, s4);
}
IFX(X_AF | X_OF) {
@ -1220,8 +1150,7 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
IFX(X_AF) {
/* af = bc & 0x8 */
ANDI(s2, s3, 8);
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s2, F_AF, s4);
}
IFX(X_OF) {
/* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
@ -1229,8 +1158,7 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
SRLI(s3, s2, 1);
XOR(s2, s2, s3);
ANDI(s2, s2, 1);
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s2, F_OF2, s4);
}
}
IFX(X_SF) {
@ -1242,13 +1170,12 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
emit_pf(dyn, ninst, s1, s3, s2);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
}
// emit NEG8 instruction, from s1, store result in s1 using s2 and s3 as scratch
void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
@ -1268,8 +1195,7 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
}
IFX(X_CF) {
BEQZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_CF);
SET_FLAGS_NEZ(s1, F_CF, s4);
}
IFX(X_AF | X_OF) {
@ -1277,8 +1203,7 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
IFX(X_AF) {
/* af = bc & 0x8 */
ANDI(s2, s3, 8);
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s2, F_AF, s4);
}
IFX(X_OF) {
/* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
@ -1286,8 +1211,7 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
SRLI(s3, s2, 1);
XOR(s2, s2, s3);
ANDI(s2, s2, 1);
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s2, F_OF2, s4);
}
}
IFX(X_SF) {
@ -1298,8 +1222,7 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
emit_pf(dyn, ninst, s1, s3, s2);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
}
@ -1336,34 +1259,29 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s4, F_AF, s5);
}
IFX(X_OF) {
SRLI(s3, s3, 14);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s5);
}
}
IFX(X_CF) {
SRLI(s3, s1, 16);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_CF);
SET_FLAGS_NEZ(s3, F_CF, s5);
}
ZEXTH(s1, s1);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_SF) {
SRLI(s3, s1, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -1414,10 +1332,6 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_CF) {
BEQZ(s6, 8);
ORI(xFlags, xFlags, 1 << F_CF);
}
IFX(X_AF | X_OF) {
if(rv64_zbb) {
ANDN(s3, s4, s1); // s3 = ~res & (op1 | op2)
@ -1428,21 +1342,21 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
SET_FLAGS_NEZ(s4, F_AF, s5);
}
IFX(X_OF) {
SRLI(s3, s3, rex.w?62:30);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
SET_FLAGS_NEZ(s3, F_OF2, s5);
}
}
IFX (X_CF) {
SET_FLAGS_NEZ(s6, F_CF, s5);
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s5, s6);
}
if (!rex.w) {
ZEROUP(s1);
@ -1451,7 +1365,6 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
emit_pf(dyn, ninst, s1, s3, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
}

View File

@ -48,8 +48,7 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
SLLI(s1, s1, c+56);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
SRLI(s1, s1, 56);
@ -57,8 +56,7 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
SB(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_OF) {
// OF flag is affected only on 1-bit shifts
@ -139,8 +137,7 @@ void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
SB(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -175,8 +172,7 @@ void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
// OF nop
IFX(X_SF) {
// SF is the same as the original operand
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
SRLI(s1, s1, c);
@ -186,8 +182,7 @@ void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
SB(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -219,8 +214,7 @@ void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
SLLI(s1, s1, 56);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
SRLI(s1, s1, 56);
@ -228,8 +222,7 @@ void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
SB(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_OF) {
// OF flag is affected only on 1-bit shifts
@ -284,8 +277,7 @@ void emit_shr8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
SB(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -316,8 +308,7 @@ void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
// OF nop
IFX(X_SF) {
// SF is the same as the original operand
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
SRL(s1, s1, s2);
@ -327,8 +318,7 @@ void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
SB(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -362,8 +352,7 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
SLLI(s1, s1, c+48);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
SRLI(s1, s1, 48);
@ -371,8 +360,7 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
SH(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_OF) {
// OF flag is affected only on 1-bit shifts
@ -452,8 +440,7 @@ void emit_shr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
SH(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -488,8 +475,7 @@ void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
// OF nop
IFX(X_SF) {
// SF is the same as the original operand
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
SRLI(s1, s1, c);
@ -499,8 +485,7 @@ void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
SH(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -533,8 +518,7 @@ void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
SLLI(s1, s1, 48);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
SRLI(s1, s1, 48);
@ -542,8 +526,7 @@ void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
SH(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_OF) {
// OF flag is affected only on 1-bit shifts
@ -598,8 +581,7 @@ void emit_shr16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
SH(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -630,8 +612,7 @@ void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
// OF nop
IFX(X_SF) {
// SF is the same as the original operand
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
SRL(s1, s1, s2);
@ -641,8 +622,7 @@ void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
SH(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -679,8 +659,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w) {
ZEROUP(s1);
@ -689,8 +668,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_OF) {
// OF flag is affected only on 1-bit shifts
@ -738,8 +716,7 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w) {
ZEROUP(s1);
@ -748,8 +725,7 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_OF) {
// OF flag is affected only on 1-bit shifts
@ -799,8 +775,7 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
SRL(s1, s1, s2);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w) {
ZEROUP(s1);
@ -809,8 +784,7 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -864,8 +838,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w && c == 0) {
ZEROUP(s1);
@ -874,8 +847,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -923,8 +895,7 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
// SRAIW sign-extends, so test sign bit before clearing upper bits
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
}
if (!rex.w) {
ZEROUP(s1);
@ -933,8 +904,7 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -1148,7 +1118,7 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
}
// emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch
void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5)
{
c&=(rex.w?0x3f:0x1f);
CLEAR_FLAGS();
@ -1193,15 +1163,13 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
}
IFX(X_SF) {
SRLIxw(s3, s1, rex.w?63:31);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s5);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_OF) {
// the OF flag is set if a sign change occurred
@ -1266,18 +1234,11 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
OR(s1, s1, s5);
}
ZEXTH(s1, s1);
IFX(X_SF) {
SLLIW(s3, s1, 16);
BGE(s3, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_PEND) {
SH(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_OF) {
// the OF flag is set if a sign change occurred
@ -1289,12 +1250,16 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
OR(xFlags, xFlags, s3);
}
}
IFX (X_SF) {
SLLIW(s3, s1, 16);
SET_FLAGS_LTZ(s3, F_SF, s4, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5)
{
c&=(rex.w?0x3f:0x1f);
CLEAR_FLAGS();
@ -1337,17 +1302,11 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_SF) {
SRLIxw(s3, s1, rex.w?63:31);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX(X_OF) {
// the OF flag is set if a sign change occurred
@ -1358,6 +1317,10 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
ORI(xFlags, xFlags, s3);
}
}
IFX (X_SF) {
SRLIxw(s3, s1, rex.w ? 63 : 31);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
@ -1397,15 +1360,6 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_SF) {
SRLIxw(s3, s1, rex.w?63:31);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_OF) {
ADDI(s5, s5, -1);
BNEZ_MARK(s5);
@ -1415,6 +1369,13 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
OR(xFlags, xFlags, s3);
MARK;
}
IFX (X_ZF) {
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX (X_SF) {
SRLIxw(s3, s1, rex.w ? 63 : 31);
SET_FLAGS_NEZ(s3, F_SF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
@ -1453,15 +1414,6 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_SF) {
SRLIxw(s3, s1, rex.w?63:31);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_OF) {
ADDI(s5, s5, -1);
BNEZ_MARK(s5);
@ -1471,6 +1423,13 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
OR(xFlags, xFlags, s3);
MARK;
}
IFX (X_ZF) {
SET_FLAGS_EQZ(s1, F_ZF, s5);
}
IFX (X_SF) {
SRLIxw(s3, s1, rex.w ? 63 : 31);
SET_FLAGS_NEZ(s3, F_SF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
@ -1524,19 +1483,9 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
OR(s1, s1, s3);
}
ZEXTH(s1, s1);
IFX(X_SF) {
SLLIW(s4, s1, 16);
BGE(s4, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_PEND) {
SH(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_OF) {
// the OF flag is set if a sign change occurred
if(c==1) {
@ -1547,6 +1496,13 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
OR(xFlags, xFlags, s3);
}
}
IFX (X_SF) {
SLLIW(s4, s1, 16);
SET_FLAGS_LTZ(s4, F_SF, s3, s5);
}
IFX (X_ZF) {
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}

View File

@ -46,13 +46,11 @@ void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
}
IFX(X_SF) {
SRLI(s3, s6, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 8);
IFX(X_ZF) {
BNEZ(s6, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s6, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s6, s3, s4);
@ -74,12 +72,10 @@ void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4)
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -113,13 +109,11 @@ void emit_cmp16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
}
IFX(X_SF) {
SRLI(s3, s6, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 16);
IFX(X_ZF) {
BNEZ(s6, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s6, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s6, s3, s4);
@ -141,12 +135,10 @@ void emit_cmp16_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4)
IFX(X_SF) {
SRLI(s3, s1, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -176,16 +168,14 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
SDxw(s6, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
BGE(s6, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_LTZ(s6, F_SF, s3, s4);
}
if (!rex.w) {
ZEROUP(s6);
}
CALC_SUB_FLAGS(s5, s2, s6, s3, s4, rex.w?64:32);
IFX(X_ZF) {
BNEZ(s6, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s6, F_ZF, s4);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s6, s3, s4);
@ -206,16 +196,14 @@ void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int
}
IFX(X_SF) {
if (rex.w) {
BGE(s1, xZR, 8);
SET_FLAGS_LTZ(s1, F_SF, s3, s4);
} else {
SRLI(s3, s1, 31);
BEQZ(s3, 8);
SET_FLAGS_NEZ(s3, F_SF, s4);
}
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s1, F_ZF, s3);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
@ -238,12 +226,10 @@ void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
}
IFX(X_SF) {
SRLI(s4, s3, 7);
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s4, F_SF, s5);
}
IFX(X_ZF) {
BNEZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s3, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s3, s4, s5);
@ -267,12 +253,10 @@ void emit_test16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
}
IFX(X_SF) {
SRLI(s4, s3, 15);
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SET_FLAGS_NEZ(s4, F_SF, s5);
}
IFX(X_ZF) {
BNEZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s3, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s3, s4, s5);
@ -298,13 +282,11 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
if (!rex.w) ZEROUP(s3);
}
IFX(X_SF) {
SRLI(s4, s3, rex.w?63:31);
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SRLI(s4, s3, rex.w ? 63 : 31);
SET_FLAGS_NEZ(s4, F_SF, s5);
}
IFX(X_ZF) {
BNEZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s3, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s3, s4, s5);
@ -335,13 +317,11 @@ void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c,
SDxw(s3, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
SRLI(s4, s3, rex.w?63:31);
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_SF);
SRLI(s4, s3, rex.w ? 63 : 31);
SET_FLAGS_NEZ(s4, F_SF, s5);
}
IFX(X_ZF) {
BNEZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
SET_FLAGS_EQZ(s3, F_ZF, s5);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s3, s4, s5);

View File

@ -898,43 +898,71 @@
#define CLEAR_FLAGS() \
IFX(X_ALL) { ANDI(xFlags, xFlags, ~((1UL << F_AF) | (1UL << F_CF) | (1UL << F_OF2) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF))); }
#define SET_FLAGS_NEZ(reg, F, scratch) \
do { \
if (rv64_xtheadcondmov) { \
ORI(scratch, xFlags, 1 << F); \
TH_MVNEZ(xFlags, scratch, reg); \
} else { \
BEQZ(reg, 8); \
ORI(xFlags, xFlags, 1 << F); \
} \
} while (0)
#define SET_FLAGS_EQZ(reg, F, scratch) \
do { \
if (rv64_xtheadcondmov) { \
ORI(scratch, xFlags, 1 << F); \
TH_MVEQZ(xFlags, scratch, reg); \
} else { \
BNEZ(reg, 8); \
ORI(xFlags, xFlags, 1 << F); \
} \
} while (0)
#define SET_FLAGS_LTZ(reg, F, scratch1, scratch2) \
do { \
if (rv64_xtheadcondmov) { \
SLT(scratch1, reg, xZR); \
ORI(scratch2, xFlags, 1 << F); \
TH_MVNEZ(xFlags, scratch2, scratch1); \
} else { \
BGE(reg, xZR, 8); \
ORI(xFlags, xFlags, 1 << F); \
} \
} while (0)
// might use op1_ as scratch
#define CALC_SUB_FLAGS(op1_, op2, res, scratch1, scratch2, width) \
IFX(X_AF | X_CF | X_OF) \
{ \
IFX (X_AF | X_CF | X_OF) { \
/* calc borrow chain */ \
/* bc = (res & (~op1 | op2)) | (~op1 & op2) */ \
OR(scratch1, op1_, op2); \
AND(scratch2, res, scratch1); \
AND(op1_, op1_, op2); \
OR(scratch2, scratch2, op1_); \
IFX(X_AF) \
{ \
IFX (X_AF) { \
/* af = bc & 0x8 */ \
ANDI(scratch1, scratch2, 8); \
BEQZ(scratch1, 8); \
ORI(xFlags, xFlags, 1 << F_AF); \
SET_FLAGS_NEZ(scratch1, F_AF, op1_); \
} \
IFX(X_CF) \
{ \
IFX (X_CF) { \
/* cf = bc & (1<<(width-1)) */ \
if ((width) == 8) { \
ANDI(scratch1, scratch2, 0x80); \
} else { \
SRLI(scratch1, scratch2, (width)-1); \
SRLI(scratch1, scratch2, (width) - 1); \
if ((width) != 64) ANDI(scratch1, scratch1, 1); \
} \
BEQZ(scratch1, 8); \
ORI(xFlags, xFlags, 1 << F_CF); \
SET_FLAGS_NEZ(scratch1, F_CF, op1_); \
} \
IFX(X_OF) \
{ \
IFX (X_OF) { \
/* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ \
SRLI(scratch1, scratch2, (width)-2); \
SRLI(scratch1, scratch2, (width) - 2); \
SRLI(scratch2, scratch1, 1); \
XOR(scratch1, scratch1, scratch2); \
ANDI(scratch1, scratch1, 1); \
BEQZ(scratch1, 8); \
ORI(xFlags, xFlags, 1 << F_OF2); \
SET_FLAGS_NEZ(scratch1, F_OF2, op1_); \
} \
}
@ -1367,8 +1395,8 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5);
void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4);
void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5);
void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
@ -1396,7 +1424,7 @@ void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
// void emit_and16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
@ -1413,9 +1441,9 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5, int s6);
void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
// void emit_sbb16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
@ -1437,8 +1465,8 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6);
void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6);
void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);