From 2515096f5674a91c1fdf3a6e197af38078c203a9 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Wed, 13 Nov 2024 17:34:21 +0100 Subject: [PATCH] [ARM64_DYNAREC] Reworked 8/16/32/64 OR opcodes --- src/dynarec/arm64/dynarec_arm64_66.c | 6 +- src/dynarec/arm64/dynarec_arm64_6664.c | 3 +- src/dynarec/arm64/dynarec_arm64_66f0.c | 18 +++- src/dynarec/arm64/dynarec_arm64_67.c | 3 +- src/dynarec/arm64/dynarec_arm64_emit_logic.c | 106 ++++++++----------- src/dynarec/arm64/dynarec_arm64_helper.h | 2 +- 6 files changed, 62 insertions(+), 76 deletions(-) diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index f9acee68..16701d0f 100644 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -119,8 +119,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); i32 = F16; UXTHw(x1, xRAX); - MOV32w(x2, i32); - emit_or16(dyn, ninst, x1, x2, x3, x4); + emit_or16c(dyn, ninst, x1, i32, x3, x4); BFIz(xRAX, x1, 0, 16); break; @@ -493,8 +492,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_or16(dyn, ninst, x1, x5, x2, x4); + emit_or16c(dyn, ninst, x1, i16, x2, x4); EWBACK; break; case 2: //ADC diff --git a/src/dynarec/arm64/dynarec_arm64_6664.c b/src/dynarec/arm64/dynarec_arm64_6664.c index 36e4ae0d..ba86644b 100644 --- a/src/dynarec/arm64/dynarec_arm64_6664.c +++ b/src/dynarec/arm64/dynarec_arm64_6664.c @@ -151,8 +151,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n grab_segdata(dyn, addr, ninst, x1, seg); GETEWO(x1, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_or16(dyn, ninst, x1, x5, x2, x4); + emit_or16c(dyn, ninst, x1, i16, x2, x4); EWBACK; break; case 2: //ADC diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c index 6d8f953b..4092bb71 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f0.c +++ b/src/dynarec/arm64/dynarec_arm64_66f0.c @@ -327,25 +327,33 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(MODREG) { if(opcode==0x81) i32 = F16S; else i32 = F8S; ed = xRAX+(nextop&7)+(rex.b<<3); - MOV32w(x5, i32); UXTHw(x6, ed); - emit_or16(dyn, ninst, x6, x5, x3, x4); + emit_or16c(dyn, ninst, x6, i32, x3, x4); BFIx(ed, x6, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); if(opcode==0x81) i32 = F16S; else i32 = F8S; - MOV32w(x5, i32); + i64 = convert_bitmask_xw(i32); + if(!i64) {MOV32w(x5, i32);} if(arm64_atomics) { UFLAG_IF { LDSETALH(x5, x1, wback); - emit_or16(dyn, ninst, x1, x5, x3, x4); + if(i64) { + emit_or16c(dyn, ninst, x1, i32, x3, x4); + } else { + emit_or16(dyn, ninst, x1, x5, x3, x4); + } } else { STSETLH(x5, wback); } } else { MARKLOCK; LDAXRH(x1, wback); - emit_or16(dyn, ninst, x1, x5, x3, x4); + if(i64) { + emit_or16c(dyn, ninst, x1, i32, x3, x4); + } else { + emit_or16(dyn, ninst, x1, x5, x3, x4); + } STLXRH(x3, x1, wback); CBNZx_MARKLOCK(x3); } diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index 4adac1d9..d9f6f70b 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -818,8 +818,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); GETEW32(x1, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_or16(dyn, ninst, x1, x5, x2, x4); + emit_or16c(dyn, ninst, x1, i16, x2, x4); EWBACK; break; case 2: //ADC diff --git a/src/dynarec/arm64/dynarec_arm64_emit_logic.c b/src/dynarec/arm64/dynarec_arm64_emit_logic.c index f9921d8e..d3d9635d 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_logic.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_logic.c @@ -73,18 +73,18 @@ void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, // emit OR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_or32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4) { + int mask = convert_bitmask_xw(c); + if(!mask) { + MOV64xw(s3, c); + emit_or32(dyn, ninst, rex, s1, s3, s3, s4); + return; + } IFX(X_PEND) { SET_DF(s4, rex.w?d_or64:d_or32); } else IFX(X_ALL) { SET_DFNONE(s4); } - int mask = convert_bitmask_xw(c); - if(mask) { - ORRxw_mask(s1, s1, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F); - } else { - MOV64xw(s3, c); - ORRxw_REG(s1, s1, s3); - } + ORRxw_mask(s1, s1, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F); IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } @@ -331,9 +331,9 @@ void emit_or8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) { MAYUSE(s2); IFX(X_PEND) { - SET_DF(s3, d_or8); + SET_DF(s4, d_or8); } else IFX(X_ALL) { - SET_DFNONE(s3); + SET_DFNONE(s4); } ORRw_REG(s1, s1, s2); IFX(X_PEND) { @@ -352,18 +352,18 @@ void emit_or8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit OR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4) { + int mask = convert_bitmask_w(c); + if(!mask) { + MOV32w(s3, c); + emit_or8c(dyn, ninst, s1, s3, s3, s4); + return; + } IFX(X_PEND) { SET_DF(s4, d_or8); } else IFX(X_ALL) { SET_DFNONE(s4); } - int mask = convert_bitmask_w(c); - if(mask) { - ORRw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F); - } else { - MOV32w(s3, c&0xff); - ORRw_REG(s1, s1, s3); - } + ORRw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F); IFX(X_PEND) { STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); } @@ -518,9 +518,9 @@ void emit_or16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) { MAYUSE(s2); IFX(X_PEND) { - SET_DF(s3, d_or16); + SET_DF(s4, d_or16); } else IFX(X_ALL) { - SET_DFNONE(s3); + SET_DFNONE(s4); } ORRw_REG(s1, s1, s2); IFX(X_PEND) { @@ -537,50 +537,32 @@ void emit_or16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } // emit OR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -//void emit_or16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4) -//{ -// IFX(X_PEND) { -// MOVW(s3, c); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s4, d_or16); -// } else IFX(X_ALL) { -// SET_DFNONE(s4); -// } -// if(c>=0 && c<256) { -// IFX(X_ALL) { -// ORRS_IMM8(s1, s1, c, 0); -// } else { -// ORR_IMM8(s1, s1, c, 0); -// } -// } else { -// IFX(X_PEND) {} else {MOVW(s3, c);} -// IFX(X_ALL) { -// ORRS_REG_LSL_IMM5(s1, s1, s3, 0); -// } else { -// ORR_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_CF | X_AF | X_ZF) { -// BIC_IMM8(xFlags, xFlags, (1<>6)&0x3F); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF | X_AF | X_OF) { + MOV32w(s3, (1<