diff --git a/js/src/nanojit/Nativei386.cpp b/js/src/nanojit/Nativei386.cpp index d2897707a9eb..24656729e079 100644 --- a/js/src/nanojit/Nativei386.cpp +++ b/js/src/nanojit/Nativei386.cpp @@ -102,7 +102,9 @@ namespace nanojit // Reserve stackNeeded bytes, padded // to preserve NJ_ALIGN_STACK-byte alignment. if (amt) + { SUBi(SP, amt); + } verbose_only( outputAddr=true; asm_output("[frag entry]"); ) NIns *fragEntry = _nIns; @@ -145,6 +147,7 @@ namespace nanojit lr->jmp = _nIns; } } + // first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue MR(SP,FP); @@ -155,13 +158,11 @@ namespace nanojit NIns *Assembler::genEpilogue() { RET(); - if (!_thisfrag->lirbuf->explicitSavedRegs) { for (int i = NumSavedRegs - 1; i >= 0; --i) POPr(savedRegs[i]); POPr(FP); // dummy } - POPr(FP); // Restore caller's FP. MR(SP,FP); // pop the stack frame return _nIns; @@ -298,7 +299,7 @@ namespace nanojit asm( "bsf %1, %%eax\n\t" "btr %%eax, %2\n\t" - "movl %%eax, %0\n\t" + "movl %%eax, %0\n\t" : "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" ); #endif /* WIN32 */ return r; @@ -393,7 +394,7 @@ namespace nanojit ST(FP, d, r); } - freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem + freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem } void Assembler::asm_load(int d, Register r) @@ -563,13 +564,13 @@ namespace nanojit if (config.sse2) { Register rv = findRegFor(value, XmmRegs); - Register rb; - if (base->isop(LIR_ialloc)) { - rb = FP; - dr += findMemFor(base); - } else { - rb = findRegFor(base, GpRegs); - } + Register rb; + if (base->isop(LIR_ialloc)) { + rb = FP; + dr += findMemFor(base); + } else { + rb = findRegFor(base, GpRegs); + } SSE_STQ(dr, rb, rv); return; } @@ -577,10 +578,10 @@ namespace nanojit int da = findMemFor(value); Register rb; if (base->isop(LIR_ialloc)) { - rb = FP; - dr += findMemFor(base); + rb = FP; + dr += findMemFor(base); } else { - rb = findRegFor(base, GpRegs); + rb = findRegFor(base, GpRegs); } asm_mmq(rb, dr, FP, da); return; @@ -645,12 +646,10 @@ namespace nanojit NIns* at = 0; LOpcode condop = cond->opcode(); NanoAssert(cond->isCond()); - if (condop >= LIR_feq && condop <= LIR_fge) { return asm_jmpcc(branchOnFalse, cond, targ); } - // produce the branch if (branchOnFalse) { @@ -698,7 +697,6 @@ namespace nanojit else //if (condop == LIR_uge) JAE(targ, isfar); } - at = _nIns; asm_cmp(cond); return at; @@ -709,14 +707,14 @@ namespace nanojit LIns* diff = ins->oprnd1(); findSpecificRegFor(diff, EBX); JMP(exit); - } + } void Assembler::asm_cmp(LIns *cond) { LOpcode condop = cond->opcode(); // LIR_ov recycles the flags set by arithmetic ops - if ((condop == LIR_ov)) + if (condop == LIR_ov) return; LInsp lhs = cond->oprnd1(); @@ -866,7 +864,6 @@ namespace nanojit Register rr = prepResultReg(ins, allow); Reservation* rA = getresv(lhs); Register ra; - // if this is last use of lhs in reg, we can re-use result reg if (rA == 0 || (ra = rA->reg) == UnknownReg) ra = findSpecificRegFor(lhs, rr); @@ -1074,17 +1071,17 @@ namespace nanojit switch (condval->opcode()) { // note that these are all opposites... - case LIR_eq: MRNE(rr, iffalsereg); break; + case LIR_eq: MRNE(rr, iffalsereg); break; case LIR_ov: MRNO(rr, iffalsereg); break; - case LIR_lt: MRGE(rr, iffalsereg); break; + case LIR_lt: MRGE(rr, iffalsereg); break; case LIR_le: MRG(rr, iffalsereg); break; - case LIR_gt: MRLE(rr, iffalsereg); break; + case LIR_gt: MRLE(rr, iffalsereg); break; case LIR_ge: MRL(rr, iffalsereg); break; - case LIR_ult: MRAE(rr, iffalsereg); break; - case LIR_ule: MRA(rr, iffalsereg); break; - case LIR_ugt: MRBE(rr, iffalsereg); break; - case LIR_uge: MRB(rr, iffalsereg); break; - default: debug_only( NanoAssert(0); ) break; + case LIR_ult: MRAE(rr, iffalsereg); break; + case LIR_ule: MRA(rr, iffalsereg); break; + case LIR_ugt: MRBE(rr, iffalsereg); break; + case LIR_uge: MRB(rr, iffalsereg); break; + debug_only( default: NanoAssert(0); break; ) } } else if (op == LIR_qcmov) { NanoAssert(0); @@ -1489,12 +1486,12 @@ namespace nanojit // // gr is 0..2^32-1 // - // sub gr,0x80000000 + // sub gr,0x80000000 // // now gr is -2^31..2^31-1, i.e. the range of int, but not the same value // as before // - // cvtsi2sd rr,gr + // cvtsi2sd rr,gr // // rr is now a double with the int value range // @@ -1530,8 +1527,8 @@ namespace nanojit Register gr = findRegFor(ins->oprnd1(), GpRegs); NanoAssert(rr == FST0); FILDQ(disp, base); - STi(base, disp+4, 0); // high 32 bits = 0 - ST(base, disp, gr); // low 32 bits = unsigned value + STi(base, disp+4, 0); // high 32 bits = 0 + ST(base, disp, gr); // low 32 bits = unsigned value } } @@ -1663,7 +1660,6 @@ namespace nanojit evict(EAX); TEST_AH(mask); LAHF(); - Reservation *rA, *rB; findRegFor2(XmmRegs, lhs, rA, rhs, rB); SSE_UCOMISD(rA->reg, rB->reg); diff --git a/js/src/nanojit/Nativei386.h b/js/src/nanojit/Nativei386.h index 46d32a37b2ae..33bb5950e9d4 100644 --- a/js/src/nanojit/Nativei386.h +++ b/js/src/nanojit/Nativei386.h @@ -90,7 +90,7 @@ namespace nanojit { - const int NJ_LOG2_PAGE_SIZE = 12; // 4K + const int NJ_LOG2_PAGE_SIZE = 12; // 4K const int NJ_MAX_REGISTERS = 24; // gpregs, x87 regs, xmm regs const int NJ_STACK_OFFSET = 0; @@ -160,14 +160,14 @@ namespace nanojit static const RegisterMask AllowableFlagRegs = 1<>3),(r)); \ *(--_nIns) = uint8_t(0x83); \ - } else { \ - IMM32(i); \ - if ( (r) == EAX) { \ - *(--_nIns) = (uint8_t) (c); \ - } else { \ + } else { \ + IMM32(i); \ + if ( (r) == EAX) { \ + *(--_nIns) = (uint8_t) (c); \ + } else { \ MODRM((c>>3),(r)); \ *(--_nIns) = uint8_t(0x81); \ - } \ - } + } \ + } #define ALUmi(c,d,b,i) \ underrunProtect(10); \ @@ -332,25 +332,25 @@ namespace nanojit #define ALU2(c,d,s) \ underrunProtect(3); \ - MODRM((d),(s)); \ + MODRM((d),(s)); \ _nIns -= 2; \ _nIns[0] = (uint8_t) ( ((c)>>8) ); \ _nIns[1] = (uint8_t) ( (c) ) -#define LAHF() do { count_alu(); ALU0(0x9F); asm_output("lahf"); } while(0) -#define SAHF() do { count_alu(); ALU0(0x9E); asm_output("sahf"); } while(0) -#define OR(l,r) do { count_alu(); ALU(0x0b, (l),(r)); asm_output("or %s,%s",gpn(l),gpn(r)); } while(0) -#define AND(l,r) do { count_alu(); ALU(0x23, (l),(r)); asm_output("and %s,%s",gpn(l),gpn(r)); } while(0) -#define XOR(l,r) do { count_alu(); ALU(0x33, (l),(r)); asm_output("xor %s,%s",gpn(l),gpn(r)); } while(0) -#define ADD(l,r) do { count_alu(); ALU(0x03, (l),(r)); asm_output("add %s,%s",gpn(l),gpn(r)); } while(0) -#define SUB(l,r) do { count_alu(); ALU(0x2b, (l),(r)); asm_output("sub %s,%s",gpn(l),gpn(r)); } while(0) -#define MUL(l,r) do { count_alu(); ALU2(0x0faf,(l),(r)); asm_output("mul %s,%s",gpn(l),gpn(r)); } while(0) +#define LAHF() do { count_alu(); ALU0(0x9F); asm_output("lahf"); } while(0) +#define SAHF() do { count_alu(); ALU0(0x9E); asm_output("sahf"); } while(0) +#define OR(l,r) do { count_alu(); ALU(0x0b, (l),(r)); asm_output("or %s,%s",gpn(l),gpn(r)); } while(0) +#define AND(l,r) do { count_alu(); ALU(0x23, (l),(r)); asm_output("and %s,%s",gpn(l),gpn(r)); } while(0) +#define XOR(l,r) do { count_alu(); ALU(0x33, (l),(r)); asm_output("xor %s,%s",gpn(l),gpn(r)); } while(0) +#define ADD(l,r) do { count_alu(); ALU(0x03, (l),(r)); asm_output("add %s,%s",gpn(l),gpn(r)); } while(0) +#define SUB(l,r) do { count_alu(); ALU(0x2b, (l),(r)); asm_output("sub %s,%s",gpn(l),gpn(r)); } while(0) +#define MUL(l,r) do { count_alu(); ALU2(0x0faf,(l),(r)); asm_output("mul %s,%s",gpn(l),gpn(r)); } while(0) #define DIV(r) do { count_alu(); ALU(0xf7, (Register)7,(r)); asm_output("idiv edx:eax, %s",gpn(r)); } while(0) -#define NOT(r) do { count_alu(); ALU(0xf7, (Register)2,(r)); asm_output("not %s",gpn(r)); } while(0) -#define NEG(r) do { count_alu(); ALU(0xf7, (Register)3,(r)); asm_output("neg %s",gpn(r)); } while(0) -#define SHR(r,s) do { count_alu(); ALU(0xd3, (Register)5,(r)); asm_output("shr %s,%s",gpn(r),gpn(s)); } while(0) -#define SAR(r,s) do { count_alu(); ALU(0xd3, (Register)7,(r)); asm_output("sar %s,%s",gpn(r),gpn(s)); } while(0) -#define SHL(r,s) do { count_alu(); ALU(0xd3, (Register)4,(r)); asm_output("shl %s,%s",gpn(r),gpn(s)); } while(0) +#define NOT(r) do { count_alu(); ALU(0xf7, (Register)2,(r)); asm_output("not %s",gpn(r)); } while(0) +#define NEG(r) do { count_alu(); ALU(0xf7, (Register)3,(r)); asm_output("neg %s",gpn(r)); } while(0) +#define SHR(r,s) do { count_alu(); ALU(0xd3, (Register)5,(r)); asm_output("shr %s,%s",gpn(r),gpn(s)); } while(0) +#define SAR(r,s) do { count_alu(); ALU(0xd3, (Register)7,(r)); asm_output("sar %s,%s",gpn(r),gpn(s)); } while(0) +#define SHL(r,s) do { count_alu(); ALU(0xd3, (Register)4,(r)); asm_output("shl %s,%s",gpn(r),gpn(s)); } while(0) #define SHIFT(c,r,i) \ underrunProtect(3);\ @@ -358,64 +358,64 @@ namespace nanojit MODRM((Register)c,r);\ *--_nIns = 0xc1; -#define SHLi(r,i) do { count_alu(); SHIFT(4,r,i); asm_output("shl %s,%d", gpn(r),i); } while(0) -#define SHRi(r,i) do { count_alu(); SHIFT(5,r,i); asm_output("shr %s,%d", gpn(r),i); } while(0) -#define SARi(r,i) do { count_alu(); SHIFT(7,r,i); asm_output("sar %s,%d", gpn(r),i); } while(0) +#define SHLi(r,i) do { count_alu(); SHIFT(4,r,i); asm_output("shl %s,%d", gpn(r),i); } while(0) +#define SHRi(r,i) do { count_alu(); SHIFT(5,r,i); asm_output("shr %s,%d", gpn(r),i); } while(0) +#define SARi(r,i) do { count_alu(); SHIFT(7,r,i); asm_output("sar %s,%d", gpn(r),i); } while(0) #define MOVZX8(d,s) do { count_alu(); ALU2(0x0fb6,d,s); asm_output("movzx %s,%s", gpn(d),gpn(s)); } while(0) -#define SUBi(r,i) do { count_alu(); ALUi(0x2d,r,i); asm_output("sub %s,%d",gpn(r),i); } while(0) -#define ADDi(r,i) do { count_alu(); ALUi(0x05,r,i); asm_output("add %s,%d",gpn(r),i); } while(0) -#define ANDi(r,i) do { count_alu(); ALUi(0x25,r,i); asm_output("and %s,%d",gpn(r),i); } while(0) -#define ORi(r,i) do { count_alu(); ALUi(0x0d,r,i); asm_output("or %s,%d",gpn(r),i); } while(0) -#define XORi(r,i) do { count_alu(); ALUi(0x35,r,i); asm_output("xor %s,%d",gpn(r),i); } while(0) +#define SUBi(r,i) do { count_alu(); ALUi(0x2d,r,i); asm_output("sub %s,%d",gpn(r),i); } while(0) +#define ADDi(r,i) do { count_alu(); ALUi(0x05,r,i); asm_output("add %s,%d",gpn(r),i); } while(0) +#define ANDi(r,i) do { count_alu(); ALUi(0x25,r,i); asm_output("and %s,%d",gpn(r),i); } while(0) +#define ORi(r,i) do { count_alu(); ALUi(0x0d,r,i); asm_output("or %s,%d",gpn(r),i); } while(0) +#define XORi(r,i) do { count_alu(); ALUi(0x35,r,i); asm_output("xor %s,%d",gpn(r),i); } while(0) -#define ADDmi(d,b,i) do { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); } while(0) +#define ADDmi(d,b,i) do { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); } while(0) -#define TEST(d,s) do { count_alu(); ALU(0x85,d,s); asm_output("test %s,%s",gpn(d),gpn(s)); } while(0) -#define CMP(l,r) do { count_alu(); ALU(0x3b, (l),(r)); asm_output("cmp %s,%s",gpn(l),gpn(r)); } while(0) -#define CMPi(r,i) do { count_alu(); ALUi(0x3d,r,i); asm_output("cmp %s,%d",gpn(r),i); } while(0) +#define TEST(d,s) do { count_alu(); ALU(0x85,d,s); asm_output("test %s,%s",gpn(d),gpn(s)); } while(0) +#define CMP(l,r) do { count_alu(); ALU(0x3b, (l),(r)); asm_output("cmp %s,%s",gpn(l),gpn(r)); } while(0) +#define CMPi(r,i) do { count_alu(); ALUi(0x3d,r,i); asm_output("cmp %s,%d",gpn(r),i); } while(0) -#define MR(d,s) do { count_mov(); ALU(0x8b,d,s); asm_output("mov %s,%s",gpn(d),gpn(s)); } while(0) -#define LEA(r,d,b) do { count_alu(); ALUm(0x8d, r,d,b); asm_output("lea %s,%d(%s)",gpn(r),d,gpn(b)); } while(0) +#define MR(d,s) do { count_mov(); ALU(0x8b,d,s); asm_output("mov %s,%s",gpn(d),gpn(s)); } while(0) +#define LEA(r,d,b) do { count_alu(); ALUm(0x8d, r,d,b); asm_output("lea %s,%d(%s)",gpn(r),d,gpn(b)); } while(0) // lea %r, d(%i*4) // This addressing mode is not supported by the MODRMSIB macro. #define LEAmi4(r,d,i) do { count_alu(); IMM32(d); *(--_nIns) = (2<<6)|(i<<3)|5; *(--_nIns) = (0<<6)|(r<<3)|4; *(--_nIns) = 0x8d; asm_output("lea %s, %p(%s*4)", gpn(r), (void*)d, gpn(i)); } while(0) #define CDQ() do { SARi(EDX, 31); MR(EDX, EAX); } while(0) -#define SETE(r) do { count_alu(); ALU2(0x0f94,(r),(r)); asm_output("sete %s",gpn(r)); } while(0) -#define SETNP(r) do { count_alu(); ALU2(0x0f9B,(r),(r)); asm_output("setnp %s",gpn(r)); } while(0) -#define SETL(r) do { count_alu(); ALU2(0x0f9C,(r),(r)); asm_output("setl %s",gpn(r)); } while(0) -#define SETLE(r) do { count_alu(); ALU2(0x0f9E,(r),(r)); asm_output("setle %s",gpn(r)); } while(0) -#define SETG(r) do { count_alu(); ALU2(0x0f9F,(r),(r)); asm_output("setg %s",gpn(r)); } while(0) -#define SETGE(r) do { count_alu(); ALU2(0x0f9D,(r),(r)); asm_output("setge %s",gpn(r)); } while(0) -#define SETB(r) do { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("setb %s",gpn(r)); } while(0) -#define SETBE(r) do { count_alu(); ALU2(0x0f96,(r),(r)); asm_output("setbe %s",gpn(r)); } while(0) -#define SETA(r) do { count_alu(); ALU2(0x0f97,(r),(r)); asm_output("seta %s",gpn(r)); } while(0) -#define SETAE(r) do { count_alu(); ALU2(0x0f93,(r),(r)); asm_output("setae %s",gpn(r)); } while(0) -#define SETO(r) do { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("seto %s",gpn(r)); } while(0) +#define SETE(r) do { count_alu(); ALU2(0x0f94,(r),(r)); asm_output("sete %s",gpn(r)); } while(0) +#define SETNP(r) do { count_alu(); ALU2(0x0f9B,(r),(r)); asm_output("setnp %s",gpn(r)); } while(0) +#define SETL(r) do { count_alu(); ALU2(0x0f9C,(r),(r)); asm_output("setl %s",gpn(r)); } while(0) +#define SETLE(r) do { count_alu(); ALU2(0x0f9E,(r),(r)); asm_output("setle %s",gpn(r)); } while(0) +#define SETG(r) do { count_alu(); ALU2(0x0f9F,(r),(r)); asm_output("setg %s",gpn(r)); } while(0) +#define SETGE(r) do { count_alu(); ALU2(0x0f9D,(r),(r)); asm_output("setge %s",gpn(r)); } while(0) +#define SETB(r) do { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("setb %s",gpn(r)); } while(0) +#define SETBE(r) do { count_alu(); ALU2(0x0f96,(r),(r)); asm_output("setbe %s",gpn(r)); } while(0) +#define SETA(r) do { count_alu(); ALU2(0x0f97,(r),(r)); asm_output("seta %s",gpn(r)); } while(0) +#define SETAE(r) do { count_alu(); ALU2(0x0f93,(r),(r)); asm_output("setae %s",gpn(r)); } while(0) +#define SETO(r) do { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("seto %s",gpn(r)); } while(0) -#define MREQ(dr,sr) do { count_alu(); ALU2(0x0f44,dr,sr); asm_output("cmove %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRNE(dr,sr) do { count_alu(); ALU2(0x0f45,dr,sr); asm_output("cmovne %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRL(dr,sr) do { count_alu(); ALU2(0x0f4C,dr,sr); asm_output("cmovl %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRLE(dr,sr) do { count_alu(); ALU2(0x0f4E,dr,sr); asm_output("cmovle %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRG(dr,sr) do { count_alu(); ALU2(0x0f4F,dr,sr); asm_output("cmovg %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRGE(dr,sr) do { count_alu(); ALU2(0x0f4D,dr,sr); asm_output("cmovge %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRB(dr,sr) do { count_alu(); ALU2(0x0f42,dr,sr); asm_output("cmovb %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRBE(dr,sr) do { count_alu(); ALU2(0x0f46,dr,sr); asm_output("cmovbe %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRA(dr,sr) do { count_alu(); ALU2(0x0f47,dr,sr); asm_output("cmova %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRAE(dr,sr) do { count_alu(); ALU2(0x0f43,dr,sr); asm_output("cmovae %s,%s", gpn(dr),gpn(sr)); } while(0) -#define MRNO(dr,sr) do { count_alu(); ALU2(0x0f41,dr,sr); asm_output("cmovno %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MREQ(dr,sr) do { count_alu(); ALU2(0x0f44,dr,sr); asm_output("cmove %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MRNE(dr,sr) do { count_alu(); ALU2(0x0f45,dr,sr); asm_output("cmovne %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MRL(dr,sr) do { count_alu(); ALU2(0x0f4C,dr,sr); asm_output("cmovl %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MRLE(dr,sr) do { count_alu(); ALU2(0x0f4E,dr,sr); asm_output("cmovle %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MRG(dr,sr) do { count_alu(); ALU2(0x0f4F,dr,sr); asm_output("cmovg %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MRGE(dr,sr) do { count_alu(); ALU2(0x0f4D,dr,sr); asm_output("cmovge %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MRB(dr,sr) do { count_alu(); ALU2(0x0f42,dr,sr); asm_output("cmovb %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MRBE(dr,sr) do { count_alu(); ALU2(0x0f46,dr,sr); asm_output("cmovbe %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MRA(dr,sr) do { count_alu(); ALU2(0x0f47,dr,sr); asm_output("cmova %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MRAE(dr,sr) do { count_alu(); ALU2(0x0f43,dr,sr); asm_output("cmovae %s,%s", gpn(dr),gpn(sr)); } while(0) +#define MRNO(dr,sr) do { count_alu(); ALU2(0x0f41,dr,sr); asm_output("cmovno %s,%s", gpn(dr),gpn(sr)); } while(0) // these aren't currently used but left in for reference //#define LDEQ(r,d,b) do { ALU2m(0x0f44,r,d,b); asm_output("cmove %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) //#define LDNEQ(r,d,b) do { ALU2m(0x0f45,r,d,b); asm_output("cmovne %s,%d(%s)", gpn(r),d,gpn(b)); } while(0) -#define LD(reg,disp,base) do { \ +#define LD(reg,disp,base) do { \ count_ld();\ - ALUm(0x8b,reg,disp,base); \ - asm_output("mov %s,%d(%s)",gpn(reg),disp,gpn(base)); } while(0) + ALUm(0x8b,reg,disp,base); \ + asm_output("mov %s,%d(%s)",gpn(reg),disp,gpn(base)); } while(0) #define LDdm(reg,addr) do { \ count_ld(); \ @@ -468,63 +468,63 @@ namespace nanojit #define LDi(r,i) do { \ count_ld();\ - underrunProtect(5); \ - IMM32(i); \ + underrunProtect(5); \ + IMM32(i); \ NanoAssert(((unsigned)r)<8); \ - *(--_nIns) = (uint8_t) (0xb8 | (r) ); \ - asm_output("mov %s,%d",gpn(r),i); } while(0) + *(--_nIns) = (uint8_t) (0xb8 | (r) ); \ + asm_output("mov %s,%d",gpn(r),i); } while(0) #define ST(base,disp,reg) do { \ count_st();\ - ALUm(0x89,reg,disp,base); \ - asm_output("mov %d(%s),%s",disp,base==UnknownReg?"0":gpn(base),gpn(reg)); } while(0) + ALUm(0x89,reg,disp,base); \ + asm_output("mov %d(%s),%s",disp,base==UnknownReg?"0":gpn(base),gpn(reg)); } while(0) -#define STi(base,disp,imm) do { \ +#define STi(base,disp,imm) do { \ count_st();\ underrunProtect(12); \ - IMM32(imm); \ - MODRMm(0, disp, base); \ - *(--_nIns) = 0xc7; \ - asm_output("mov %d(%s),%d",disp,gpn(base),imm); } while(0) + IMM32(imm); \ + MODRMm(0, disp, base); \ + *(--_nIns) = 0xc7; \ + asm_output("mov %d(%s),%d",disp,gpn(base),imm); } while(0) #define RET() do { count_ret(); ALU0(0xc3); asm_output("ret"); } while(0) -#define NOP() do { count_alu(); ALU0(0x90); asm_output("nop"); } while(0) +#define NOP() do { count_alu(); ALU0(0x90); asm_output("nop"); } while(0) #define INT3() do { ALU0(0xcc); asm_output("int3"); } while(0) #define PUSHi(i) do { \ count_push();\ if (isS8(i)) { \ - underrunProtect(2); \ + underrunProtect(2); \ _nIns-=2; _nIns[0] = 0x6a; _nIns[1] = (uint8_t)(i); \ - asm_output("push %d",i); \ + asm_output("push %d",i); \ } else \ { PUSHi32(i); } } while(0) -#define PUSHi32(i) do { \ +#define PUSHi32(i) do { \ count_push();\ - underrunProtect(5); \ - IMM32(i); \ - *(--_nIns) = 0x68; \ - asm_output("push %d",i); } while(0) + underrunProtect(5); \ + IMM32(i); \ + *(--_nIns) = 0x68; \ + asm_output("push %d",i); } while(0) #define PUSHr(r) do { \ count_push();\ - underrunProtect(1); \ + underrunProtect(1); \ NanoAssert(((unsigned)r)<8); \ - *(--_nIns) = (uint8_t) ( 0x50 | (r) ); \ - asm_output("push %s",gpn(r)); } while(0) + *(--_nIns) = (uint8_t) ( 0x50 | (r) ); \ + asm_output("push %s",gpn(r)); } while(0) #define PUSHm(d,b) do { \ count_pushld();\ ALUm(0xff, 6, d, b); \ - asm_output("push %d(%s)",d,gpn(b)); } while(0) + asm_output("push %d(%s)",d,gpn(b)); } while(0) #define POPr(r) do { \ count_pop();\ - underrunProtect(1); \ + underrunProtect(1); \ NanoAssert(((unsigned)r)<8); \ *(--_nIns) = (uint8_t) ( 0x58 | (r) ); \ - asm_output("pop %s",gpn(r)); } while(0) + asm_output("pop %s",gpn(r)); } while(0) #define JCC32 0x0f #define JMP8 0xeb @@ -532,7 +532,7 @@ namespace nanojit #define JCC(o,t,isfar,n) do { \ count_jcc();\ - underrunProtect(6); \ + underrunProtect(6); \ intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \ if (isS8(tt) && !isfar) { \ verbose_only( NIns* next = _nIns; (void)next; ) \ @@ -551,32 +551,32 @@ namespace nanojit #define JMP_long(t) do { \ count_jmp();\ - underrunProtect(5); \ + underrunProtect(5); \ intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \ - JMP_long_nochk_offset(tt); \ + JMP_long_nochk_offset(tt); \ verbose_only( verbose_outputf("%010lx:", (unsigned long)_nIns); ) \ } while(0) -#define JMP(t) do { \ +#define JMP(t) do { \ count_jmp();\ - underrunProtect(5); \ + underrunProtect(5); \ intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \ if (isS8(tt)) { \ verbose_only( NIns* next = _nIns; (void)next; ) \ _nIns -= 2; \ _nIns[0] = JMP8; \ _nIns[1] = (uint8_t) ( (tt)&0xff ); \ - asm_output("jmp %p",(next+tt)); \ + asm_output("jmp %p",(next+tt)); \ } else { \ - JMP_long_nochk_offset(tt); \ + JMP_long_nochk_offset(tt); \ } } while(0) // this should only be used when you can guarantee there is enough room on the page #define JMP_long_nochk_offset(o) do {\ verbose_only( NIns* next = _nIns; (void)next; ) \ - IMM32((o)); \ - *(--_nIns) = JMP32; \ - asm_output("jmp %p",(next+(o))); } while(0) + IMM32((o)); \ + *(--_nIns) = JMP32; \ + asm_output("jmp %p",(next+(o))); } while(0) #define JMP_indirect(r) do { \ underrunProtect(2); \ @@ -614,18 +614,18 @@ namespace nanojit // sse instructions #define SSE(c,d,s) \ - underrunProtect(9); \ - MODRM((d),(s)); \ + underrunProtect(9); \ + MODRM((d),(s)); \ _nIns -= 3; \ - _nIns[0] = (uint8_t)(((c)>>16)&0xff); \ + _nIns[0] = (uint8_t)(((c)>>16)&0xff); \ _nIns[1] = (uint8_t)(((c)>>8)&0xff); \ _nIns[2] = (uint8_t)((c)&0xff) -#define SSEm(c,r,d,b) \ - underrunProtect(9); \ - MODRMm((r),(d),(b)); \ - _nIns -= 3; \ - _nIns[0] = (uint8_t)(((c)>>16)&0xff); \ +#define SSEm(c,r,d,b) \ + underrunProtect(9); \ + MODRMm((r),(d),(b)); \ + _nIns -= 3; \ + _nIns[0] = (uint8_t)(((c)>>16)&0xff); \ _nIns[1] = (uint8_t)(((c)>>8)&0xff); \ _nIns[2] = (uint8_t)((c)&0xff) @@ -656,13 +656,13 @@ namespace nanojit #define SSE_LDQ(r,d,b)do { \ count_ldq();\ SSEm(0xf30f7e, (r)&7, (d), (b)); \ - asm_output("movq %s,%d(%s)",gpn(r),d,gpn(b)); \ + asm_output("movq %s,%d(%s)",gpn(r),d,gpn(b)); \ } while(0) #define SSE_STQ(d,b,r)do { \ count_stq();\ SSEm(0x660fd6, (r)&7, (d), (b)); \ - asm_output("movq %d(%s),%s",(d),gpn(b),gpn(r)); \ + asm_output("movq %d(%s),%s",(d),gpn(b),gpn(r)); \ } while(0) #define SSE_CVTSI2SD(xr,gr) do{ \ @@ -688,7 +688,7 @@ namespace nanojit NanoAssert(_is_xmm_reg_(d)); \ SSE(0x660f6e, (d)&7, (s)&7); \ } \ - asm_output("movd %s,%s",gpn(d),gpn(s)); \ + asm_output("movd %s,%s",gpn(d),gpn(s)); \ } while(0) #define SSE_MOVSD(rd,rs) do{ \ @@ -702,7 +702,7 @@ namespace nanojit count_st();\ NanoAssert(_is_xmm_reg_(xrs) && _is_gp_reg_(b));\ SSEm(0x660f7e, (xrs)&7, d, b);\ - asm_output("movd %d(%s),%s", d, gpn(b), gpn(xrs));\ + asm_output("movd %d(%s),%s", d, gpn(b), gpn(xrs));\ } while(0) #define SSE_ADDSD(rd,rs) do{ \ @@ -775,80 +775,80 @@ namespace nanojit } while(0) // floating point unit -#define FPUc(o) \ - underrunProtect(2); \ - *(--_nIns) = ((uint8_t)(o)&0xff); \ +#define FPUc(o) \ + underrunProtect(2); \ + *(--_nIns) = ((uint8_t)(o)&0xff); \ *(--_nIns) = (uint8_t)(((o)>>8)&0xff) #define FPU(o,r) \ - underrunProtect(2); \ + underrunProtect(2); \ *(--_nIns) = uint8_t(((uint8_t)(o)&0xff) | (r&7));\ *(--_nIns) = (uint8_t)(((o)>>8)&0xff) -#define FPUm(o,d,b) \ - underrunProtect(7); \ - MODRMm((uint8_t)(o), d, b); \ +#define FPUm(o,d,b) \ + underrunProtect(7); \ + MODRMm((uint8_t)(o), d, b); \ *(--_nIns) = (uint8_t)((o)>>8) #define TEST_AH(i) do { \ count_alu();\ - underrunProtect(3); \ + underrunProtect(3); \ *(--_nIns) = ((uint8_t)(i)); \ - *(--_nIns) = 0xc4; \ - *(--_nIns) = 0xf6; \ - asm_output("test ah, %d",i); } while(0) + *(--_nIns) = 0xc4; \ + *(--_nIns) = 0xf6; \ + asm_output("test ah, %d",i); } while(0) #define TEST_AX(i) do { \ count_fpu();\ - underrunProtect(5); \ - *(--_nIns) = (0); \ + underrunProtect(5); \ + *(--_nIns) = (0); \ *(--_nIns) = ((uint8_t)(i)); \ - *(--_nIns) = ((uint8_t)((i)>>8)); \ - *(--_nIns) = (0); \ - *(--_nIns) = 0xa9; \ - asm_output("test ax, %d",i); } while(0) + *(--_nIns) = ((uint8_t)((i)>>8)); \ + *(--_nIns) = (0); \ + *(--_nIns) = 0xa9; \ + asm_output("test ax, %d",i); } while(0) -#define FNSTSW_AX() do { count_fpu(); FPUc(0xdfe0); asm_output("fnstsw_ax"); } while(0) -#define FCHS() do { count_fpu(); FPUc(0xd9e0); asm_output("fchs"); } while(0) -#define FLD1() do { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); } while(0) -#define FLDZ() do { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); } while(0) -#define FFREE(r) do { count_fpu(); FPU(0xddc0, r); asm_output("ffree %s",fpn(r)); } while(0) -#define FSTQ(p,d,b) do { count_stq(); FPUm(0xdd02|(p), d, b); asm_output("fst%sq %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0) +#define FNSTSW_AX() do { count_fpu(); FPUc(0xdfe0); asm_output("fnstsw_ax"); } while(0) +#define FCHS() do { count_fpu(); FPUc(0xd9e0); asm_output("fchs"); } while(0) +#define FLD1() do { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); } while(0) +#define FLDZ() do { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); } while(0) +#define FFREE(r) do { count_fpu(); FPU(0xddc0, r); asm_output("ffree %s",fpn(r)); } while(0) +#define FSTQ(p,d,b) do { count_stq(); FPUm(0xdd02|(p), d, b); asm_output("fst%sq %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0) #define FSTPQ(d,b) FSTQ(1,d,b) -#define FCOM(p,d,b) do { count_fpuld(); FPUm(0xdc02|(p), d, b); asm_output("fcom%s %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0) -#define FLDQ(d,b) do { count_ldq(); FPUm(0xdd00, d, b); asm_output("fldq %d(%s)",d,gpn(b)); fpu_push();} while(0) -#define FILDQ(d,b) do { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)",d,gpn(b)); fpu_push(); } while(0) -#define FILD(d,b) do { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)",d,gpn(b)); fpu_push(); } while(0) -#define FADD(d,b) do { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)",d,gpn(b)); } while(0) -#define FSUB(d,b) do { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)",d,gpn(b)); } while(0) -#define FSUBR(d,b) do { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)",d,gpn(b)); } while(0) -#define FMUL(d,b) do { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)",d,gpn(b)); } while(0) -#define FDIV(d,b) do { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)",d,gpn(b)); } while(0) -#define FDIVR(d,b) do { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)",d,gpn(b)); } while(0) -#define FINCSTP() do { count_fpu(); FPUc(0xd9f7); asm_output("fincstp"); } while(0) -#define FSTP(r) do { count_fpu(); FPU(0xddd8, r&7); asm_output("fstp %s",fpn(r)); fpu_pop();} while(0) -#define FCOMP() do { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();} while(0) -#define FCOMPP() do { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();} while(0) -#define FLDr(r) do { count_ldq(); FPU(0xd9c0,r); asm_output("fld %s",fpn(r)); fpu_push(); } while(0) -#define EMMS() do { count_fpu(); FPUc(0x0f77); asm_output("emms"); } while (0) +#define FCOM(p,d,b) do { count_fpuld(); FPUm(0xdc02|(p), d, b); asm_output("fcom%s %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0) +#define FLDQ(d,b) do { count_ldq(); FPUm(0xdd00, d, b); asm_output("fldq %d(%s)",d,gpn(b)); fpu_push();} while(0) +#define FILDQ(d,b) do { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)",d,gpn(b)); fpu_push(); } while(0) +#define FILD(d,b) do { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)",d,gpn(b)); fpu_push(); } while(0) +#define FADD(d,b) do { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)",d,gpn(b)); } while(0) +#define FSUB(d,b) do { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)",d,gpn(b)); } while(0) +#define FSUBR(d,b) do { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)",d,gpn(b)); } while(0) +#define FMUL(d,b) do { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)",d,gpn(b)); } while(0) +#define FDIV(d,b) do { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)",d,gpn(b)); } while(0) +#define FDIVR(d,b) do { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)",d,gpn(b)); } while(0) +#define FINCSTP() do { count_fpu(); FPUc(0xd9f7); asm_output("fincstp"); } while(0) +#define FSTP(r) do { count_fpu(); FPU(0xddd8, r&7); asm_output("fstp %s",fpn(r)); fpu_pop();} while(0) +#define FCOMP() do { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();} while(0) +#define FCOMPP() do { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();} while(0) +#define FLDr(r) do { count_ldq(); FPU(0xd9c0,r); asm_output("fld %s",fpn(r)); fpu_push(); } while(0) +#define EMMS() do { count_fpu(); FPUc(0x0f77); asm_output("emms"); } while (0) // standard direct call -#define CALL(c) do { \ +#define CALL(c) do { \ count_call();\ - underrunProtect(5); \ + underrunProtect(5); \ int offset = (c->_address) - ((int)_nIns); \ IMM32( (uint32_t)offset ); \ *(--_nIns) = 0xE8; \ - verbose_only(asm_output("call %s",(c->_name));) \ + verbose_only(asm_output("call %s",(c->_name));) \ debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\ } while (0) // indirect call thru register -#define CALLr(c,r) do { \ +#define CALLr(c,r) do { \ count_calli();\ underrunProtect(2);\ ALU(0xff, 2, (r));\ - verbose_only(asm_output("call %s",gpn(r));) \ + verbose_only(asm_output("call %s",gpn(r));) \ debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\ } while (0)