[ARM64_DYNAREC] Improved stability of RCL/RCR and added 32/64 bits with constant emiter

This commit is contained in:
ptitSeb 2024-05-17 14:29:58 +02:00
parent 04e960b0da
commit d7127ccc3d
6 changed files with 122 additions and 104 deletions

View File

@ -2074,27 +2074,43 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
break;
case 2:
INST_NAME("RCL Ed, Ib");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f;
SETFLAGS(X_OF|X_CF, SF_SET_DF);
GETEDW(x4, x1, 1);
u8 = F8;
MOV32w(x2, u8);
CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4);
WBACK;
u8 = geted_ib(dyn, addr, ninst, nextop)&(0x1f+(rex.w*0x20));
if(u8) {
READFLAGS(X_CF);
SETFLAGS(X_CF|X_OF, SF_SUBSET); // removed PENDING on purpose
GETED(1);
u8 = (F8)&(rex.w?0x3f:0x1f);
emit_rcl32c(dyn, ninst, rex, ed, u8, x3, x4);
WBACK;
} else {
if(MODREG && ! rex.w && !rex.is32bits) {
GETED(1);
MOVw_REG(ed, ed);
} else {
FAKEED;
}
F8;
}
break;
case 3:
INST_NAME("RCR Ed, Ib");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f;
SETFLAGS(X_OF|X_CF, SF_SET_DF);
GETEDW(x4, x1, 1);
u8 = F8;
MOV32w(x2, u8);
CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4);
WBACK;
u8 = geted_ib(dyn, addr, ninst, nextop)&(0x1f+(rex.w*0x20));
if(u8) {
READFLAGS(X_CF);
SETFLAGS(X_CF|X_OF, SF_SUBSET); // removed PENDING on purpose
GETED(1);
u8 = (F8)&(rex.w?0x3f:0x1f);
emit_rcr32c(dyn, ninst, rex, ed, u8, x3, x4);
WBACK;
} else {
if(MODREG && ! rex.w && !rex.is32bits) {
GETED(1);
MOVw_REG(ed, ed);
} else {
FAKEED;
}
F8;
}
break;
case 4:
case 6:
@ -2435,7 +2451,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
case 2:
INST_NAME("RCL Eb, 1");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
GETEB(x1, 0);
emit_rcl8c(dyn, ninst, ed, 1, x4, x5);
EBBACK;
@ -2443,7 +2459,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
case 3:
INST_NAME("RCR Eb, 1");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
GETEB(x1, 0);
emit_rcr8c(dyn, ninst, ed, 1, x4, x5);
EBBACK;
@ -2491,22 +2507,18 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
break;
case 2:
INST_NAME("RCL Ed, 1");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
MOV32w(x2, 1);
GETEDW(x4, x1, 0);
CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4);
SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
GETED(0);
emit_rcl32c(dyn, ninst, rex, ed, 1, x3, x4);
WBACK;
break;
case 3:
INST_NAME("RCR Ed, 1");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
MOV32w(x2, 1);
GETEDW(x4, x1, 0);
CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4);
SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
GETED(0);
emit_rcr32c(dyn, ninst, rex, ed, 1, x3, x4);
WBACK;
break;
case 4:

View File

@ -976,7 +976,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
INST_NAME("RCL Ew, Ib");
if (geted_ib(dyn, addr, ninst, nextop) & 31) {
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
GETEW(x1, 1);
u8 = F8;
emit_rcl16c(dyn, ninst, ed, u8, x4, x5);
@ -990,7 +990,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
INST_NAME("RCR Ew, Ib");
if (geted_ib(dyn, addr, ninst, nextop) & 31) {
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
GETEW(x1, 1);
u8 = F8;
emit_rcr16c(dyn, ninst, ed, u8, x4, x5);
@ -1080,7 +1080,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
case 2:
INST_NAME("RCL Ew, 1");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
GETEW(x1, 0);
emit_rcl16c(dyn, ninst, x1, 1, x5, x4);
EWBACK;
@ -1088,7 +1088,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
case 3:
INST_NAME("RCR Ew, 1");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
GETEW(x1, 0);
emit_rcr16c(dyn, ninst, x1, 1, x5, x4);
EWBACK;

View File

@ -991,14 +991,8 @@ void emit_rcl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
if (!(c%9)) return;
IFX(X_PEND) {
MOV32w(s3, c);
STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
SET_DF(s4, d_rcl8);
} else IFX(X_ALL) {
SET_DFNONE(s4);
}
SET_DFNONE(s4);
c%=9;
BFIw(s1, xFlags, 8, 1); // insert cf
IFX(X_OF|X_CF) {
@ -1033,14 +1027,8 @@ void emit_rcr8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
if (!(c%9)) return;
IFX(X_PEND) {
MOV32w(s3, c);
STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
SET_DF(s4, d_rcr8);
} else IFX(X_ALL) {
SET_DFNONE(s4);
}
SET_DFNONE(s4);
c%=9;
IFX(X_OF) {
MOVw_REG(s3, wFlags);
@ -1071,14 +1059,8 @@ void emit_rcl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
if (!(c%17)) return;
IFX(X_PEND) {
MOV32w(s3, c);
STRH_U12(s1, xEmu, offsetof(x64emu_t, op1));
STRH_U12(s3, xEmu, offsetof(x64emu_t, op2));
SET_DF(s4, d_rcl16);
} else IFX(X_ALL) {
SET_DFNONE(s4);
}
SET_DFNONE(s4);
c%=17;
BFIw(s1, xFlags, 16, 1); // insert cf
IFX(X_OF|X_CF) {
@ -1111,14 +1093,8 @@ void emit_rcr16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
if (!(c%17)) return;
IFX(X_PEND) {
MOV32w(s3, c);
STRH_U12(s1, xEmu, offsetof(x64emu_t, op1));
STRH_U12(s3, xEmu, offsetof(x64emu_t, op2));
SET_DF(s4, d_rcr16);
} else IFX(X_ALL) {
SET_DFNONE(s4);
}
SET_DFNONE(s4);
c%=17;
BFIw(s1, xFlags, 16, 1); // insert cf
IFX(X_OF) {
@ -1141,6 +1117,70 @@ void emit_rcr16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
}
}
// emit RCL32/RCL64 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_rcl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
{
MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
SET_DFNONE(s4);
IFX(X_OF|X_CF) {
LSRxw_IMM(s3, s1, (rex.w?64:32)-c);
}
if(c==1) {
LSLxw(s1, s1, 1);
BFIxw(s1, xFlags, 0, 1);
} else {
LSLxw(s4, s1, c);
BFIxw(s4, xFlags, c-1, 1);
ORRxw_REG_LSR(s1, s4, s1, (rex.w?65:33)-c);
}
IFX(X_PEND) {
STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_CF) {
BFIw(xFlags, s3, F_CF, 1);
}
IFX(X_OF) {
if(c==1) {
EORxw_REG_LSR(s3, s3, s1, rex.w?63:31);
BFIw(xFlags, s3, F_OF, 1);
}
}
}
// emit RCR32/RCR64 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_rcr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
{
MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
SET_DFNONE(s4);
IFX(X_OF) {
if(c==1) {
EORxw_REG_LSR(s3, xFlags, s1, rex.w?63:31);
BFIw(xFlags, s3, F_OF, 1);
}
}
IFX(X_CF) {
BFXILxw(s3, s1, c-1, 1);
}
if(c==1) {
LSRxw(s1, s1, 1);
BFIxw(s1, xFlags, rex.w?63:31, 1);
} else {
LSRxw(s4, s1, c);
BFIxw(s4, xFlags, (rex.w?64:32)-c, 1);
ORRxw_REG_LSL(s1, s4, s1, (rex.w?65:33)-c);
}
IFX(X_CF) {
BFIw(wFlags, s3, 0, 1);
}
IFX(X_PEND) {
STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
}
}
// emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch
void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
{

View File

@ -1102,6 +1102,8 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
#define emit_rcr8c STEPNAME(emit_rcr8c)
#define emit_rcl16c STEPNAME(emit_rcl16c)
#define emit_rcr16c STEPNAME(emit_rcr16c)
#define emit_rcl32c STEPNAME(emit_rcl32c)
#define emit_rcr32c STEPNAME(emit_rcr32c)
#define emit_shrd32c STEPNAME(emit_shrd32c)
#define emit_shrd32 STEPNAME(emit_shrd32)
#define emit_shld32c STEPNAME(emit_shld32c)
@ -1259,6 +1261,8 @@ void emit_rcl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
void emit_rcr8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
void emit_rcl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
void emit_rcr16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
void emit_rcl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
void emit_rcr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4);

View File

@ -1051,37 +1051,7 @@ void UpdateFlags(x64emu_t *emu)
}
CONDITIONAL_SET_FLAG(emu->res.u64 & (1L << 63), F_CF);
break;
case d_rcl8:
cnt = emu->op2.u8%9;
CONDITIONAL_SET_FLAG(emu->op1.u8>>(9-cnt) & 1, F_CF);
// should for cnt==1
CONDITIONAL_SET_FLAG(((emu->res.u8>>7) ^ ACCESS_FLAG(F_CF)) & 1, F_OF);
break;
case d_rcr8:
cnt = emu->op2.u8%9;
// should for cnt==1, using "before" CF
CONDITIONAL_SET_FLAG(((emu->res.u8>>7) ^ ACCESS_FLAG(F_CF)) & 1, F_OF);
// new CF
CONDITIONAL_SET_FLAG(((cnt==1)?emu->op1.u8:(emu->op1.u8>>(cnt-1))) & 1, F_CF);
break;
case d_rcl16:
cnt = emu->op2.u16%17;
CONDITIONAL_SET_FLAG(emu->op1.u16>>(17-cnt) & 1, F_CF);
// should for cnt==1
CONDITIONAL_SET_FLAG(((emu->res.u16>>15) ^ ACCESS_FLAG(F_CF)) & 1, F_OF);
break;
case d_rcr16:
cnt = emu->op2.u16%17;
// should for cnt==1, using "before" CF
CONDITIONAL_SET_FLAG(((emu->res.u16>>15) ^ ACCESS_FLAG(F_CF)) & 1, F_OF);
// new CF
CONDITIONAL_SET_FLAG(((cnt==1)?emu->op1.u16:(emu->op1.u16>>(cnt-1))) & 1, F_CF);
break;
case d_rcl32:
case d_rcl64:
case d_rcr32:
case d_rcr64:
case d_unknown:
printf_log(LOG_NONE, "Box64: %p trying to evaluate Unknown deferred Flags\n", (void*)R_RIP);
break;

View File

@ -119,14 +119,6 @@ typedef enum {
d_ror16,
d_ror32,
d_ror64,
d_rcl8,
d_rcl16,
d_rcl32,
d_rcl64,
d_rcr8,
d_rcr16,
d_rcr32,
d_rcr64,
d_dec8i, // interpreter version, to handle the CF flags that is untouched
d_dec16i,
d_dec32i,