Bug 580515 - TM: LIR_cmovd mishandled with X86_FORCE_SSE2=no. r=edwsmith.

--HG-- extra : convert_revision : 4effe362e918583ec7b98b08da24f02c0833d306
2024-10-14 13:55:43 +00:00 · 2010-12-01 14:23:44 -08:00 · 2010-12-01 14:23:44 -08:00 · 18402713d9
commit 18402713d9
parent 884504296d
4 changed files with 142 additions and 108 deletions
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@ -1173,20 +1173,24 @@ namespace nanojit
        Register rf = findRegFor(iffalse, allow & ~rmask(rr));

        if (ins->isop(LIR_cmovd)) {
+            // See Nativei386.cpp:asm_cmov() for an explanation of the subtleties here.
            NIns* target = _nIns;
            asm_nongp_copy(rr, rf);
-            asm_branch(false, cond, target);
+            asm_branch_helper(false, cond, target);

            // If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
            Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;

            if (rr != rt)
                asm_nongp_copy(rr, rt);
+
            freeResourcesOf(ins);
            if (!iftrue->isInReg()) {
                NanoAssert(rt == rr);
                findSpecificRegForUnallocated(iftrue, rr);
            }
+
+            asm_cmp(cond);
            return;
        }

@ -1194,8 +1198,8 @@ namespace nanojit
        Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;

        // WARNING: We cannot generate any code that affects the condition
-        // codes between the MRcc generation here and the asm_cmp() call
-        // below.  See asm_cmp() for more details.
+        // codes between the MRcc generation here and the asm_cmpi() call
+        // below.  See asm_cmpi() for more details.
        LOpcode condop = cond->opcode();
        if (ins->isop(LIR_cmovi)) {
            switch (condop) {
@ -1234,30 +1238,36 @@ namespace nanojit
            findSpecificRegForUnallocated(iftrue, rr);
        }

-        asm_cmp(cond);
+        asm_cmpi(cond);
    }

-    NIns* Assembler::asm_branch(bool onFalse, LIns *cond, NIns *target) {
-        NanoAssert(cond->isCmp());
-        LOpcode condop = cond->opcode();
+    NIns* Assembler::asm_branch(bool onFalse, LIns* cond, NIns* target) {
+        NIns* patch = asm_branch_helper(onFalse, cond, target);
+        asm_cmp(cond);
+        return patch;
+    }

+    NIns* Assembler::asm_branch_helper(bool onFalse, LIns *cond, NIns *target) {
        if (target && !isTargetWithinS32(target)) {
-            // conditional jumps beyond 32bit range, so invert the branch/compare
-            // and emit an unconditional jump to the target
+            // A conditional jump beyond 32-bit range, so invert the
+            // branch/compare and emit an unconditional jump to the target:
            //         j(inverted) B1
            //         jmp target
            //     B1:
            NIns* shortTarget = _nIns;
            JMP(target);
            target = shortTarget;
-
            onFalse = !onFalse;
        }
-        if (isCmpDOpcode(condop))
-            return asm_branchd(onFalse, cond, target);
+        return isCmpDOpcode(cond->opcode())
+             ? asm_branchd_helper(onFalse, cond, target)
+             : asm_branchi_helper(onFalse, cond, target);
+    }

+    NIns* Assembler::asm_branchi_helper(bool onFalse, LIns *cond, NIns *target) {
        // We must ensure there's room for the instruction before calculating
        // the offset.  And the offset determines the opcode (8bit or 32bit).
+        LOpcode condop = cond->opcode();
        if (target && isTargetWithinS8(target)) {
            if (onFalse) {
                switch (condop) {
@ -1315,9 +1325,7 @@ namespace nanojit
                }
            }
        }
-        NIns *patch = _nIns;    // address of instruction to patch
-        asm_cmp(cond);
-        return patch;
+        return _nIns;   // address of instruction to patch
    }

    NIns* Assembler::asm_branch_ov(LOpcode, NIns* target) {
@ -1334,13 +1342,17 @@ namespace nanojit
        return _nIns;
    }

+    void Assembler::asm_cmp(LIns *cond) {
+        isCmpDOpcode(cond->opcode()) ? asm_cmpd(cond) : asm_cmpi(cond);
+    }
+
    // WARNING: this function cannot generate code that will affect the
    // condition codes prior to the generation of the test/cmp.  See
-    // Nativei386.cpp:asm_cmp() for details.
-    void Assembler::asm_cmp(LIns *cond) {
+    // Nativei386.cpp:asm_cmpi() for details.
+    void Assembler::asm_cmpi(LIns *cond) {
        LIns *b = cond->oprnd2();
        if (isImm32(b)) {
-            asm_cmp_imm(cond);
+            asm_cmpi_imm(cond);
            return;
        }
        LIns *a = cond->oprnd1();
@ -1361,7 +1373,7 @@ namespace nanojit
        }
    }

-    void Assembler::asm_cmp_imm(LIns *cond) {
+    void Assembler::asm_cmpi_imm(LIns *cond) {
        LOpcode condop = cond->opcode();
        LIns *a = cond->oprnd1();
        LIns *b = cond->oprnd2();
@ -1399,11 +1411,9 @@ namespace nanojit
    //  LIR_jt  jae ja  swap+jae swap+ja  jp over je
    //  LIR_jf  jb  jbe swap+jb  swap+jbe jne+jp

-    NIns* Assembler::asm_branchd(bool onFalse, LIns *cond, NIns *target) {
+    NIns* Assembler::asm_branchd_helper(bool onFalse, LIns *cond, NIns *target) {
        LOpcode condop = cond->opcode();
        NIns *patch;
-        LIns *a = cond->oprnd1();
-        LIns *b = cond->oprnd2();
        if (condop == LIR_eqd) {
            if (onFalse) {
                // branch if unordered or !=
@ -1422,34 +1432,23 @@ namespace nanojit
            }
        }
        else {
-            if (condop == LIR_ltd) {
-                condop = LIR_gtd;
-                LIns *t = a; a = b; b = t;
-            } else if (condop == LIR_led) {
-                condop = LIR_ged;
-                LIns *t = a; a = b; b = t;
-            }
-            if (condop == LIR_gtd) {
-                if (onFalse)
-                    JBE(8, target);
-                else
-                    JA(8, target);
-            } else { // LIR_ged
-                if (onFalse)
-                    JB(8, target);
-                else
-                    JAE(8, target);
+            // LIR_ltd and LIR_gtd are handled by the same case because
+            // asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a).  Likewise for
+            // LIR_led/LIR_ged.
+            switch (condop) {
+            case LIR_ltd:
+            case LIR_gtd: if (onFalse) JBE(8, target); else JA(8, target);  break;
+            case LIR_led:
+            case LIR_ged: if (onFalse) JB(8, target);  else JAE(8, target); break;
+            default:      NanoAssert(0);                                    break;
            }
            patch = _nIns;
        }
-        asm_cmpd(a, b);
        return patch;
    }

    void Assembler::asm_condd(LIns *ins) {
        LOpcode op = ins->opcode();
-        LIns *a = ins->oprnd1();
-        LIns *b = ins->oprnd2();
        if (op == LIR_eqd) {
            // result = ZF & !PF, must do logic on flags
            // r = al|bl|cl|dl, can only use rh without rex prefix
@ -1460,30 +1459,40 @@ namespace nanojit
            X86_SETNP(r);       // setnp    rh       rh = !PF
            X86_SETE(r);        // sete     rl       rl = ZF
        } else {
-            if (op == LIR_ltd) {
-                op = LIR_gtd;
-                LIns *t = a; a = b; b = t;
-            } else if (op == LIR_led) {
-                op = LIR_ged;
-                LIns *t = a; a = b; b = t;
-            }
+            // LIR_ltd and LIR_gtd are handled by the same case because
+            // asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a).  Likewise for
+            // LIR_led/LIR_ged.
            Register r = prepareResultReg(ins, GpRegs); // x64 can use any GPR as setcc target
            MOVZX8(r, r);
-            if (op == LIR_gtd)
-                SETA(r);
-            else
-                SETAE(r);
+            switch (op) {
+            case LIR_ltd:
+            case LIR_gtd: SETA(r);       break;
+            case LIR_led:
+            case LIR_ged: SETAE(r);      break;
+            default:      NanoAssert(0); break;
+            }
        }

        freeResourcesOf(ins);

-        asm_cmpd(a, b);
+        asm_cmpd(ins);
    }

    // WARNING: This function cannot generate any code that will affect the
-    // condition codes prior to the generation of the ucomisd.  See asm_cmp()
+    // condition codes prior to the generation of the ucomisd.  See asm_cmpi()
    // for more details.
-    void Assembler::asm_cmpd(LIns *a, LIns *b) {
+    void Assembler::asm_cmpd(LIns *cond) {
+        LOpcode opcode = cond->opcode();
+        LIns* a = cond->oprnd1();
+        LIns* b = cond->oprnd2();
+        // First, we convert (a < b) into (b > a), and (a <= b) into (b >= a).
+        if (opcode == LIR_ltd) {
+            opcode = LIR_gtd;
+            LIns* t = a; a = b; b = t;
+        } else if (opcode == LIR_led) {
+            opcode = LIR_ged;
+            LIns* t = a; a = b; b = t;
+        }
        Register ra, rb;
        findRegFor2(FpRegs, a, ra, FpRegs, b, rb);
        UCOMISD(ra, rb);
@ -1518,7 +1527,7 @@ namespace nanojit
    }

    // WARNING: the code generated by this function must not affect the
-    // condition codes.  See asm_cmp() for details.
+    // condition codes.  See asm_cmpi() for details.
    void Assembler::asm_restore(LIns *ins, Register r) {
        if (ins->isop(LIR_allocp)) {
            int d = arDisp(ins);
@ -1587,7 +1596,7 @@ namespace nanojit
        }
        freeResourcesOf(ins);

-        asm_cmp(ins);
+        asm_cmpi(ins);
    }

    void Assembler::asm_ret(LIns *ins) {
--- a/js/src/nanojit/NativeX64.h
+++ b/js/src/nanojit/NativeX64.h
@ -423,9 +423,12 @@ namespace nanojit
        void endLoadRegs(LIns *ins);\
        void dis(NIns *p, int bytes);\
        void asm_cmp(LIns*);\
-        void asm_cmp_imm(LIns*);\
-        void asm_cmpd(LIns*, LIns*);\
-        NIns* asm_branchd(bool, LIns*, NIns*);\
+        void asm_cmpi(LIns*);\
+        void asm_cmpi_imm(LIns*);\
+        void asm_cmpd(LIns*);\
+        NIns* asm_branch_helper(bool, LIns*, NIns*);\
+        NIns* asm_branchi_helper(bool, LIns*, NIns*);\
+        NIns* asm_branchd_helper(bool, LIns*, NIns*);\
        void asm_div(LIns *ins);\
        void asm_div_mod(LIns *ins);\
        int max_stk_used;\
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@ -854,8 +854,6 @@ namespace nanojit
    inline void Assembler::FLD1()      { count_fpu(); FPUc(0xd9e8);    asm_output("fld1"); fpu_push(); }
    inline void Assembler::FLDZ()      { count_fpu(); FPUc(0xd9ee);    asm_output("fldz"); fpu_push(); }

-    inline void Assembler::FFREE(R r)  { count_fpu(); FPU(0xddc0, r);  asm_output("ffree %s",gpn(r)); }
-
    inline void Assembler::FST32(bool p, I32 d, R b){ count_stq(); FPUm(0xd902|(p?1:0), d, b);   asm_output("fst%s32 %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
    inline void Assembler::FSTQ(bool p, I32 d, R b) { count_stq(); FPUm(0xdd02|(p?1:0), d, b);   asm_output("fst%sq %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }

@ -894,8 +892,6 @@ namespace nanojit
    inline void Assembler::FMULdm( const double* dm) { count_ldq(); FPUdm(0xdc01, dm); asm_output("fmul (%p)", (void*)dm); }
    inline void Assembler::FDIVRdm(const double* dm) { count_ldq(); FPUdm(0xdc07, dm); asm_output("fdivr (%p)", (void*)dm); }

-    inline void Assembler::FINCSTP()   { count_fpu(); FPUc(0xd9f7); asm_output("fincstp"); fpu_pop(); }
-
    inline void Assembler::FCOMP()     { count_fpu(); FPUc(0xD8D9);    asm_output("fcomp"); fpu_pop();}
    inline void Assembler::FCOMPP()    { count_fpu(); FPUc(0xDED9);    asm_output("fcompp"); fpu_pop();fpu_pop();}
    inline void Assembler::FLDr(R r)   { count_ldq(); FPU(0xd9c0, r);  asm_output("fld %s", gpn(r)); fpu_push(); }
@ -1208,7 +1204,7 @@ namespace nanojit
    }

    // WARNING: the code generated by this function must not affect the
-    // condition codes.  See asm_cmp().
+    // condition codes.  See asm_cmpi().
    void Assembler::asm_restore(LIns* ins, Register r)
    {
        NanoAssert(ins->getReg() == r);
@ -1521,19 +1517,18 @@ namespace nanojit
        }
    }

-    NIns* Assembler::asm_branch(bool branchOnFalse, LIns* cond, NIns* targ)
+    NIns* Assembler::asm_branch_helper(bool branchOnFalse, LIns* cond, NIns* targ)
    {
-        LOpcode condop = cond->opcode();
-        NanoAssert(cond->isCmp());
-
-        // Handle float conditions separately.
-        if (isCmpDOpcode(condop)) {
-            return asm_branchd(branchOnFalse, cond, targ);
-        }
+        return isCmpDOpcode(cond->opcode())
+             ? asm_branchd_helper(branchOnFalse, cond, targ)
+             : asm_branchi_helper(branchOnFalse, cond, targ);
+    }

+    NIns* Assembler::asm_branchi_helper(bool branchOnFalse, LIns* cond, NIns* targ)
+    {
        if (branchOnFalse) {
            // op == LIR_xf/LIR_jf
-            switch (condop) {
+            switch (cond->opcode()) {
            case LIR_eqi:   JNE(targ);      break;
            case LIR_lti:   JNL(targ);      break;
            case LIR_lei:   JNLE(targ);     break;
@ -1547,7 +1542,7 @@ namespace nanojit
            }
        } else {
            // op == LIR_xt/LIR_jt
-            switch (condop) {
+            switch (cond->opcode()) {
            case LIR_eqi:   JE(targ);       break;
            case LIR_lti:   JL(targ);       break;
            case LIR_lei:   JLE(targ);      break;
@ -1560,7 +1555,12 @@ namespace nanojit
            default:        NanoAssert(0);  break;
            }
        }
-        NIns* at = _nIns;
+        return _nIns;
+    }
+
+    NIns* Assembler::asm_branch(bool branchOnFalse, LIns* cond, NIns* targ)
+    {
+        NIns* at = asm_branch_helper(branchOnFalse, cond, targ);
        asm_cmp(cond);
        return at;
    }
@ -1584,6 +1584,11 @@ namespace nanojit
        JMP_indexed(indexreg, 2, table);
    }

+    void Assembler::asm_cmp(LIns *cond)
+    {
+        isCmpDOpcode(cond->opcode()) ? asm_cmpd(cond) : asm_cmpi(cond);
+    }
+
    // This generates a 'test' or 'cmp' instruction for a condition, which
    // causes the condition codes to be set appropriately.  It's used with
    // conditional branches, conditional moves, and when generating
@ -1623,7 +1628,7 @@ namespace nanojit
    // asm_restore(), that means that asm_restore() cannot generate code which
    // affects the condition codes.
    //
-    void Assembler::asm_cmp(LIns *cond)
+    void Assembler::asm_cmpi(LIns *cond)
    {
        LIns* lhs = cond->oprnd1();
        LIns* rhs = cond->oprnd2();
@ -1734,7 +1739,7 @@ namespace nanojit

        freeResourcesOf(ins);

-        asm_cmp(ins);
+        asm_cmpi(ins);
    }

    // Two example cases for "ins = add lhs, rhs".  '*' lines are those
@ -2051,11 +2056,10 @@ namespace nanojit
                   (ins->isop(LIR_cmovd) && iftrue->isD() && iffalse->isD()));

        if (!_config.i386_sse2 && ins->isop(LIR_cmovd)) {
+            // See the SSE2 case below for an explanation of the subtleties here.
            debug_only( Register rr = ) prepareResultReg(ins, x87Regs);
            NanoAssert(FST0 == rr);
-            NanoAssert(!iftrue->isInReg() || iftrue->getReg() == FST0);
-
-            NanoAssert(!iffalse->isInReg());
+            NanoAssert(!iftrue->isInReg() && !iffalse->isInReg());

            NIns* target = _nIns;

@ -2065,52 +2069,73 @@ namespace nanojit
                int df = findMemFor(iffalse);
                FLDQ(df, FP);
            }
+            FSTP(FST0);     // pop the stack
+            asm_branch_helper(false, condval, target);

-            FINCSTP();
-            // Its not sufficient to merely decrement the FP stack pointer, we have to
-            // also free FST0, otherwise the load above fails.
-            FFREE(FST0);
-            asm_branch(false, condval, target);
-
+            NanoAssert(ins->getReg() == rr);
            freeResourcesOf(ins);
            if (!iftrue->isInReg())
                findSpecificRegForUnallocated(iftrue, FST0);

+            asm_cmp(condval);
+
            return;
        }

        RegisterMask allow = ins->isD() ? XmmRegs : GpRegs;
-
        Register rr = prepareResultReg(ins, allow);
-
        Register rf = findRegFor(iffalse, allow & ~rmask(rr));

        if (ins->isop(LIR_cmovd)) {
+            // The obvious way to handle this is as follows:
+            //
+            //     mov rr, rt       # only needed if rt is live afterwards
+            //     do comparison
+            //     jt end
+            //     mov rr, rf
+            //   end:
+            //
+            // The problem with this is that doing the comparison can cause
+            // registers to be evicted, possibly including 'rr', which holds
+            // 'ins'.  And that screws things up.  So instead we do this:
+            //
+            //     do comparison
+            //     mov rr, rt       # only needed if rt is live afterwards
+            //     jt end
+            //     mov rr, rf
+            //   end:
+            //
+            // Putting the 'mov' between the comparison and the jump is ok
+            // because move instructions don't modify the condition codes.
+            //
            NIns* target = _nIns;
            asm_nongp_copy(rr, rf);
-            asm_branch(false, condval, target);
+            asm_branch_helper(false, condval, target);

            // If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
            Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;

            if (rr != rt)
                asm_nongp_copy(rr, rt);
+
+            NanoAssert(ins->getReg() == rr);
            freeResourcesOf(ins);
            if (!iftrue->isInReg()) {
                NanoAssert(rt == rr);
                findSpecificRegForUnallocated(iftrue, rr);
            }
+
+            asm_cmp(condval);
            return;
        }

        // If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
        Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
-
        NanoAssert(ins->isop(LIR_cmovi));

        // WARNING: We cannot generate any code that affects the condition
-        // codes between the MRcc generation here and the asm_cmp() call
-        // below.  See asm_cmp() for more details.
+        // codes between the MRcc generation here and the asm_cmpi() call
+        // below.  See asm_cmpi() for more details.
        switch (condval->opcode()) {
            // Note that these are all opposites...
            case LIR_eqi:    MRNE(rr, rf);   break;
@ -2128,6 +2153,7 @@ namespace nanojit
        if (rr != rt)
            MR(rr, rt);

+        NanoAssert(ins->getReg() == rr);
        freeResourcesOf(ins);
        if (!iftrue->isInReg()) {
            NanoAssert(rt == rr);
@ -2614,7 +2640,7 @@ namespace nanojit
        }
    }

-    NIns* Assembler::asm_branchd(bool branchOnFalse, LIns *cond, NIns *targ)
+    NIns* Assembler::asm_branchd_helper(bool branchOnFalse, LIns* cond, NIns *targ)
    {
        NIns* at = 0;
        LOpcode opcode = cond->opcode();
@ -2673,14 +2699,13 @@ namespace nanojit

        if (!at)
            at = _nIns;
-        asm_cmpd(cond);

        return at;
    }

    // WARNING: This function cannot generate any code that will affect the
    // condition codes prior to the generation of the
-    // ucomisd/fcompp/fcmop/fcom.  See asm_cmp() for more details.
+    // ucomisd/fcompp/fcmop/fcom.  See asm_cmpi() for more details.
    void Assembler::asm_cmpd(LIns *cond)
    {
        LOpcode condop = cond->opcode();
@ -2699,14 +2724,13 @@ namespace nanojit
                LIns* t = lhs; lhs = rhs; rhs = t;
            }

-
            // LIR_eqd, if lhs == rhs:
            //   ucomisd       ZPC   outcome (SETNP/JNP succeeds if P==0)
            //   -------       ---   -------
            //   UNORDERED     111   SETNP/JNP fails
            //   EQUAL         100   SETNP/JNP succeeds
            //
-            // LIR_eqd, if lsh != rhs;
+            // LIR_eqd, if lhs != rhs;
            //   ucomisd       ZPC   outcome (SETP/JP succeeds if P==0,
            //                                SETE/JE succeeds if Z==0)
            //   -------       ---   -------
@ -2810,13 +2834,10 @@ namespace nanojit
            } else {
                TEST_AH(mask);
                FNSTSW_AX();        // requires rEAX to be free
-                if (rhs->isImmD())
-                {
+                if (rhs->isImmD()) {
                    const uint64_t* p = findImmDFromPool(rhs->immDasQ());
                    FCOMdm(pop, (const double*)p);
-                }
-                else
-                {
+                } else {
                    int d = findMemFor(rhs);
                    FCOM(pop, d, FP);
                }
--- a/js/src/nanojit/Nativei386.h
+++ b/js/src/nanojit/Nativei386.h
@ -199,9 +199,12 @@ namespace nanojit
        void asm_farg(LIns*, int32_t& stkd);\
        void asm_arg(ArgType ty, LIns* p, Register r, int32_t& stkd);\
        void asm_pusharg(LIns*);\
-        void asm_cmpd(LIns *cond);\
-        NIns* asm_branchd(bool, LIns*, NIns*);\
        void asm_cmp(LIns *cond); \
+        void asm_cmpi(LIns *cond); \
+        void asm_cmpd(LIns *cond);\
+        NIns* asm_branch_helper(bool, LIns* cond, NIns*);\
+        NIns* asm_branchi_helper(bool, LIns* cond, NIns*);\
+        NIns* asm_branchd_helper(bool, LIns* cond, NIns*);\
        void asm_div_mod(LIns *cond); \
        void asm_load(int d, Register r); \
        void asm_immd(Register r, uint64_t q, double d, bool canClobberCCs); \
@ -429,7 +432,6 @@ namespace nanojit
        void FCHS(); \
        void FLD1(); \
        void FLDZ(); \
-        void FFREE(Register r); \
        void FST32(bool p, int32_t d, Register b); \
        void FSTQ(bool p, int32_t d, Register b); \
        void FSTPQ(int32_t d, Register b); \
@ -451,7 +453,6 @@ namespace nanojit
        void FSUBRdm(const double* dm); \
        void FMULdm( const double* dm); \
        void FDIVRdm(const double* dm); \
-        void FINCSTP(); \
        void FSTP(Register r) { \
            count_fpu(); \
            FPU(0xddd8, r); \