[LA64_DYNAREC] Added more opcode and fixes (#1423)

* Fixed F2 0F MOVSD opcode * Added F3 0F 6F MOVDQU opcode * Added F2,F3 A5 MOVSD opcode * Added 80 /5 SUB opcode * Fixed a typo in .clang-format * Added 66 85 TEST opcode and a fix
2025-02-18 21:42:17 +00:00 · 2024-04-06 17:38:04 +08:00 · 2024-04-06 17:38:04 +08:00 · 89f0c494e4
commit 89f0c494e4
parent 24f9263fa4
7 changed files with 118 additions and 8 deletions
--- a/.clang-format
+++ b/.clang-format
@ -12,6 +12,6 @@ MaxEmptyLinesToKeep: 2
 IndentCaseLabels: true
 AlignConsecutiveMacros: true
 WhitespaceSensitiveMacros: ['QUOTE']
-IfMacros: ['IFX', 'IFX2', 'IFXA', 'IF_PEND0R0', 'IFXX', 'IFX2X', 'IFXN', 'UFLAG_IF', 'PASS2IF']
+IfMacros: ['IFX', 'IFX2', 'IFXA', 'IFX_PENDOR0', 'IFXX', 'IFX2X', 'IFXN', 'UFLAG_IF', 'PASS2IF']
 UseTab: Never
 ---
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@ -463,6 +463,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                    emit_and8c(dyn, ninst, x1, u8, x2, x4);
                    EBBACK();
                    break;
+                case 5: // SUB
+                    INST_NAME("SUB Eb, Ib");
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6);
+                    EBBACK();
+                    break;
                case 7: // CMP
                    INST_NAME("CMP Eb, Ib");
                    SETFLAGS(X_ALL, SF_SET_PENDING);
@ -690,6 +698,37 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
            LD_BU(x2, x1, 0);
            BSTRINS_D(xRAX, x2, 7, 0);
            break;
+        case 0xA5:
+            if (rep) {
+                INST_NAME("REP MOVSD");
+                CBZ_NEXT(xRCX);
+                ANDI(x1, xFlags, 1 << F_DF);
+                BNEZ_MARK2(x1);
+                MARK; // Part with DF==0
+                LDxw(x1, xRSI, 0);
+                SDxw(x1, xRDI, 0);
+                ADDI_D(xRSI, xRSI, rex.w ? 8 : 4);
+                ADDI_D(xRDI, xRDI, rex.w ? 8 : 4);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK(xRCX);
+                B_NEXT_nocond;
+                MARK2; // Part with DF==1
+                LDxw(x1, xRSI, 0);
+                SDxw(x1, xRDI, 0);
+                ADDI_D(xRSI, xRSI, rex.w ? -8 : -4);
+                ADDI_D(xRDI, xRDI, rex.w ? -8 : -4);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK2(xRCX);
+                // done
+            } else {
+                INST_NAME("MOVSD");
+                GETDIR(x3, x1, rex.w ? 8 : 4);
+                LDxw(x1, xRSI, 0);
+                SDxw(x1, xRDI, 0);
+                ADD_D(xRSI, xRSI, x3);
+                ADD_D(xRDI, xRDI, x3);
+            }
+            break;
        case 0xA6:
            switch (rep) {
                case 1:
--- a/src/dynarec/la64/dynarec_la64_66.c
+++ b/src/dynarec/la64/dynarec_la64_66.c
@ -91,6 +91,14 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                    DEFAULT;
            }
            break;
+        case 0x85:
+            INST_NAME("TEST Ew, Gw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEW(x1, 0);
+            GETGW(x2);
+            emit_test16(dyn, ninst, x1, x2, x3, x4, x5);
+            break;
        case 0x89:
            INST_NAME("MOV Ew, Gw");
            nextop = F8;
--- a/src/dynarec/la64/dynarec_la64_emit_tests.c
+++ b/src/dynarec/la64/dynarec_la64_emit_tests.c
@ -132,7 +132,7 @@ void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
    CLEAR_FLAGS(s3);
    IFX(X_AF | X_CF | X_OF) {
        // for later flag calculation
-        NOR(s5, s5, s1);
+        NOR(s5, xZR, s1);
    }

    // It's a cmp, we can't store the result back to s1.
@ -329,6 +329,47 @@ void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
    }
 }

+// emit TEST16 instruction, from test s1, s2, using s3, s4 and s5 as scratch
+void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    IFX_PENDOR0 {
+        SET_DF(s3, d_tst16);
+    } else {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_ALL) {
+            X64_AND_H(s1, s2);
+        }
+
+        IFX_PENDOR0 {
+            AND(s3, s1, s2);
+            ST_H(s3, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    CLEAR_FLAGS(s3);
+    AND(s3, s1, s2); // res = s1 & s2
+
+    IFX_PENDOR0 {
+        ST_H(s3, xEmu, offsetof(x64emu_t, res));
+    }
+
+    IFX (X_SF) {
+        SRLI_D(s4, s3, 15);
+        BEQZ(s4, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX (X_ZF) {
+        BNEZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s3, s4, s5);
+    }
+}
+
 // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch
 void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
--- a/src/dynarec/la64/dynarec_la64_f20f.c
+++ b/src/dynarec/la64/dynarec_la64_f20f.c
@ -68,9 +68,9 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
            GETG;
            v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
            if(MODREG) {
-                ed = (nextop&7)+ (rex.b<<3);
+                ed = (nextop & 7) + (rex.b << 3);
                d0 = sse_get_reg(dyn, ninst, x1, ed, 0);
-                FMOV_D(d0, v0);
+                VEXTRINS_D(d0, v0, 0); // d0[63:0] = v0[63:0]
            } else {
                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
                FST_D(v0, ed, fixedaddress);
@ -85,7 +85,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
            GETEXSD(v1, 0);
            d0 = fpu_get_scratch(dyn);
            FADD_D(d0, v0, v1);
-            VEXTRINS_D(v0, d0, 0); // v0[63:0] = v1[63:0]
+            VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
            break;
        case 0x59:
            INST_NAME("MULSD Gx, Ex");
@ -95,7 +95,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
            GETEXSD(v1, 0);
            d0 = fpu_get_scratch(dyn);
            FMUL_D(d0, v0, v1);
-            VEXTRINS_D(v0, d0, 0); // v0[63:0] = v1[63:0]
+            VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
            break;
        case 0x5C:
            INST_NAME("SUBSD Gx, Ex");
@ -105,7 +105,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
            GETEXSD(v1, 0);
            d0 = fpu_get_scratch(dyn);
            FSUB_D(d0, v0, v1);
-            VEXTRINS_D(v0, d0, 0); // v0[63:0] = v1[63:0]
+            VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
            break;
        case 0x5E:
            INST_NAME("DIVSD Gx, Ex");
@ -115,7 +115,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
            GETEXSD(v1, 0);
            d0 = fpu_get_scratch(dyn);
            FDIV_D(d0, v0, v1);
-            VEXTRINS_D(v0, d0, 0); // v0[63:0] = v1[63:0]
+            VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
            break;
        default:
            DEFAULT;
--- a/src/dynarec/la64/dynarec_la64_f30f.c
+++ b/src/dynarec/la64/dynarec_la64_f30f.c
@ -53,6 +53,20 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
            nextop = F8;
            FAKEED;
            break;
+        case 0x6F:
+            INST_NAME("MOVDQU Gx, Ex");
+            nextop = F8;
+            if (MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0);
+                GETGX_empty(v0);
+                VOR_V(v0, v1, v1);
+            } else {
+                GETGX_empty(v0);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0);
+                VLD(v0, ed, fixedaddress);
+            }
+            break;
        case 0x7E:
            INST_NAME("MOVQ Gx, Ex");
            nextop = F8;
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@ -83,6 +83,12 @@
 // GETGD    get x64 register in gd
 #define GETGD gd = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3));

+// GETGW extract x64 register in gd, that is i
+#define GETGW(i)                                         \
+    gd = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3)); \
+    BSTRPICK_D(i, gd, 15, 0);                            \
+    gd = i;
+
 // GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
 #define GETED(D)                                                                                \
    if (MODREG) {                                                                               \
@ -606,6 +612,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_cmp8           STEPNAME(emit_cmp8)
 #define emit_cmp8_0         STEPNAME(emit_cmp8_0)
 #define emit_test8          STEPNAME(emit_test8)
+#define emit_test16         STEPNAME(emit_test16)
 #define emit_test32         STEPNAME(emit_test32)
 #define emit_test32c        STEPNAME(emit_test32c)
 #define emit_add32          STEPNAME(emit_add32)
@ -672,6 +679,7 @@ void emit_cmp8_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_cmp16_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
 void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_test32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5);
 void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);