Bug 506139 - TM/nanojit: merge TR whitespace changes in the i386 backend. r=rreitmai.

This commit is contained in:
Nicholas Nethercote 2009-07-24 10:02:22 +10:00
parent cea112890b
commit 78d790d9f7
2 changed files with 219 additions and 223 deletions

View File

@ -102,7 +102,9 @@ namespace nanojit
// Reserve stackNeeded bytes, padded
// to preserve NJ_ALIGN_STACK-byte alignment.
if (amt)
{
SUBi(SP, amt);
}
verbose_only( outputAddr=true; asm_output("[frag entry]"); )
NIns *fragEntry = _nIns;
@ -145,6 +147,7 @@ namespace nanojit
lr->jmp = _nIns;
}
}
// first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
MR(SP,FP);
@ -155,13 +158,11 @@ namespace nanojit
NIns *Assembler::genEpilogue()
{
RET();
if (!_thisfrag->lirbuf->explicitSavedRegs) {
for (int i = NumSavedRegs - 1; i >= 0; --i)
POPr(savedRegs[i]);
POPr(FP); // dummy
}
POPr(FP); // Restore caller's FP.
MR(SP,FP); // pop the stack frame
return _nIns;
@ -298,7 +299,7 @@ namespace nanojit
asm(
"bsf %1, %%eax\n\t"
"btr %%eax, %2\n\t"
"movl %%eax, %0\n\t"
"movl %%eax, %0\n\t"
: "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" );
#endif /* WIN32 */
return r;
@ -393,7 +394,7 @@ namespace nanojit
ST(FP, d, r);
}
freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
}
void Assembler::asm_load(int d, Register r)
@ -563,13 +564,13 @@ namespace nanojit
if (config.sse2) {
Register rv = findRegFor(value, XmmRegs);
Register rb;
if (base->isop(LIR_ialloc)) {
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
Register rb;
if (base->isop(LIR_ialloc)) {
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
SSE_STQ(dr, rb, rv);
return;
}
@ -577,10 +578,10 @@ namespace nanojit
int da = findMemFor(value);
Register rb;
if (base->isop(LIR_ialloc)) {
rb = FP;
dr += findMemFor(base);
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
rb = findRegFor(base, GpRegs);
}
asm_mmq(rb, dr, FP, da);
return;
@ -645,12 +646,10 @@ namespace nanojit
NIns* at = 0;
LOpcode condop = cond->opcode();
NanoAssert(cond->isCond());
if (condop >= LIR_feq && condop <= LIR_fge)
{
return asm_jmpcc(branchOnFalse, cond, targ);
}
// produce the branch
if (branchOnFalse)
{
@ -698,7 +697,6 @@ namespace nanojit
else //if (condop == LIR_uge)
JAE(targ, isfar);
}
at = _nIns;
asm_cmp(cond);
return at;
@ -709,14 +707,14 @@ namespace nanojit
LIns* diff = ins->oprnd1();
findSpecificRegFor(diff, EBX);
JMP(exit);
}
}
void Assembler::asm_cmp(LIns *cond)
{
LOpcode condop = cond->opcode();
// LIR_ov recycles the flags set by arithmetic ops
if ((condop == LIR_ov))
if (condop == LIR_ov)
return;
LInsp lhs = cond->oprnd1();
@ -866,7 +864,6 @@ namespace nanojit
Register rr = prepResultReg(ins, allow);
Reservation* rA = getresv(lhs);
Register ra;
// if this is last use of lhs in reg, we can re-use result reg
if (rA == 0 || (ra = rA->reg) == UnknownReg)
ra = findSpecificRegFor(lhs, rr);
@ -1074,17 +1071,17 @@ namespace nanojit
switch (condval->opcode())
{
// note that these are all opposites...
case LIR_eq: MRNE(rr, iffalsereg); break;
case LIR_eq: MRNE(rr, iffalsereg); break;
case LIR_ov: MRNO(rr, iffalsereg); break;
case LIR_lt: MRGE(rr, iffalsereg); break;
case LIR_lt: MRGE(rr, iffalsereg); break;
case LIR_le: MRG(rr, iffalsereg); break;
case LIR_gt: MRLE(rr, iffalsereg); break;
case LIR_gt: MRLE(rr, iffalsereg); break;
case LIR_ge: MRL(rr, iffalsereg); break;
case LIR_ult: MRAE(rr, iffalsereg); break;
case LIR_ule: MRA(rr, iffalsereg); break;
case LIR_ugt: MRBE(rr, iffalsereg); break;
case LIR_uge: MRB(rr, iffalsereg); break;
default: debug_only( NanoAssert(0); ) break;
case LIR_ult: MRAE(rr, iffalsereg); break;
case LIR_ule: MRA(rr, iffalsereg); break;
case LIR_ugt: MRBE(rr, iffalsereg); break;
case LIR_uge: MRB(rr, iffalsereg); break;
debug_only( default: NanoAssert(0); break; )
}
} else if (op == LIR_qcmov) {
NanoAssert(0);
@ -1489,12 +1486,12 @@ namespace nanojit
//
// gr is 0..2^32-1
//
// sub gr,0x80000000
// sub gr,0x80000000
//
// now gr is -2^31..2^31-1, i.e. the range of int, but not the same value
// as before
//
// cvtsi2sd rr,gr
// cvtsi2sd rr,gr
//
// rr is now a double with the int value range
//
@ -1530,8 +1527,8 @@ namespace nanojit
Register gr = findRegFor(ins->oprnd1(), GpRegs);
NanoAssert(rr == FST0);
FILDQ(disp, base);
STi(base, disp+4, 0); // high 32 bits = 0
ST(base, disp, gr); // low 32 bits = unsigned value
STi(base, disp+4, 0); // high 32 bits = 0
ST(base, disp, gr); // low 32 bits = unsigned value
}
}
@ -1663,7 +1660,6 @@ namespace nanojit
evict(EAX);
TEST_AH(mask);
LAHF();
Reservation *rA, *rB;
findRegFor2(XmmRegs, lhs, rA, rhs, rB);
SSE_UCOMISD(rA->reg, rB->reg);

View File

@ -90,7 +90,7 @@
namespace nanojit
{
const int NJ_LOG2_PAGE_SIZE = 12; // 4K
const int NJ_LOG2_PAGE_SIZE = 12; // 4K
const int NJ_MAX_REGISTERS = 24; // gpregs, x87 regs, xmm regs
const int NJ_STACK_OFFSET = 0;
@ -160,14 +160,14 @@ namespace nanojit
static const RegisterMask AllowableFlagRegs = 1<<EAX |1<<ECX | 1<<EDX | 1<<EBX;
#define _rmask_(r) (1<<(r))
#define _is_xmm_reg_(r) ((_rmask_(r)&XmmRegs)!=0)
#define _is_x87_reg_(r) ((_rmask_(r)&x87Regs)!=0)
#define _is_fp_reg_(r) ((_rmask_(r)&FpRegs)!=0)
#define _is_gp_reg_(r) ((_rmask_(r)&GpRegs)!=0)
#define _rmask_(r) (1<<(r))
#define _is_xmm_reg_(r) ((_rmask_(r)&XmmRegs)!=0)
#define _is_x87_reg_(r) ((_rmask_(r)&x87Regs)!=0)
#define _is_fp_reg_(r) ((_rmask_(r)&FpRegs)!=0)
#define _is_gp_reg_(r) ((_rmask_(r)&GpRegs)!=0)
#define nextreg(r) Register(r+1)
#define prevreg(r) Register(r-1)
#define nextreg(r) Register(r+1)
#define prevreg(r) Register(r-1)
verbose_only( extern const char* regNames[]; )
@ -177,7 +177,7 @@ namespace nanojit
#define DECLARE_PLATFORM_ASSEMBLER() \
const static Register argRegs[2], retRegs[2]; \
bool x87Dirty; \
bool x87Dirty; \
bool pad[3];\
void nativePageReset();\
void nativePageSetup();\
@ -187,25 +187,25 @@ namespace nanojit
#define swapptrs() { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; }
#define IMM32(i) \
_nIns -= 4; \
_nIns -= 4; \
*((int32_t*)_nIns) = (int32_t)(i)
#define MODRMs(r,d,b,l,i) \
NanoAssert(unsigned(r)<8 && unsigned(b)<8 && unsigned(i)<8); \
if ((d) == 0 && (b) != EBP) { \
if ((d) == 0 && (b) != EBP) { \
_nIns -= 2; \
_nIns[0] = (uint8_t) ( 0<<6 | (r)<<3 | 4); \
_nIns[1] = (uint8_t) ((l)<<6 | (i)<<3 | (b)); \
} else if (isS8(d)) { \
_nIns -= 3; \
_nIns[0] = (uint8_t) ( 1<<6 | (r)<<3 | 4 ); \
_nIns[1] = (uint8_t) ( (l)<<6 | (i)<<3 | (b) ); \
_nIns[0] = (uint8_t) ( 0<<6 | (r)<<3 | 4); \
_nIns[1] = (uint8_t) ((l)<<6 | (i)<<3 | (b)); \
} else if (isS8(d)) { \
_nIns -= 3; \
_nIns[0] = (uint8_t) ( 1<<6 | (r)<<3 | 4 ); \
_nIns[1] = (uint8_t) ( (l)<<6 | (i)<<3 | (b) ); \
_nIns[2] = (uint8_t) (d); \
} else { \
IMM32(d); \
*(--_nIns) = (uint8_t) ( (l)<<6 | (i)<<3 | (b) ); \
*(--_nIns) = (uint8_t) ( 2<<6 | (r)<<3 | 4 ); \
}
} else { \
IMM32(d); \
*(--_nIns) = (uint8_t) ( (l)<<6 | (i)<<3 | (b) ); \
*(--_nIns) = (uint8_t) ( 2<<6 | (r)<<3 | 4 ); \
}
#define MODRMm(r,d,b) \
NanoAssert(unsigned(r)<8 && ((b)==UnknownReg || unsigned(b)<8)); \
@ -213,17 +213,17 @@ namespace nanojit
IMM32(d);\
*(--_nIns) = (uint8_t) (0<<6 | (r)<<3 | 5);\
} else if ((b) == ESP) { \
MODRMs(r, d, b, 0, (Register)4); \
} \
MODRMs(r, d, b, 0, (Register)4); \
} \
else if ( (d) == 0 && (b) != EBP) { \
*(--_nIns) = (uint8_t) ( 0<<6 | (r)<<3 | (b) ); \
} else if (isS8(d)) { \
*(--_nIns) = (uint8_t) (d); \
*(--_nIns) = (uint8_t) ( 1<<6 | (r)<<3 | (b) ); \
} else { \
IMM32(d); \
*(--_nIns) = (uint8_t) ( 2<<6 | (r)<<3 | (b) ); \
}
*(--_nIns) = (uint8_t) ( 0<<6 | (r)<<3 | (b) ); \
} else if (isS8(d)) { \
*(--_nIns) = (uint8_t) (d); \
*(--_nIns) = (uint8_t) ( 1<<6 | (r)<<3 | (b) ); \
} else { \
IMM32(d); \
*(--_nIns) = (uint8_t) ( 2<<6 | (r)<<3 | (b) ); \
}
#define MODRMSIB(reg,base,index,scale,disp) \
if (disp != 0 || base == EBP) { \
@ -252,13 +252,13 @@ namespace nanojit
NanoAssert(((unsigned)(d))<8 && ((unsigned)(s))<8); \
*(--_nIns) = (uint8_t) ( 3<<6|(d)<<3|(s) )
#define ALU0(o) \
#define ALU0(o) \
underrunProtect(1);\
*(--_nIns) = (uint8_t) (o)
#define ALUm(c,r,d,b) \
#define ALUm(c,r,d,b) \
underrunProtect(8); \
MODRMm(r,d,b); \
MODRMm(r,d,b); \
*(--_nIns) = uint8_t(c)
#define ALUdm(c,r,addr) \
@ -271,9 +271,9 @@ namespace nanojit
MODRMSIB(r,base,index,scale,disp); \
*(--_nIns) = uint8_t(c)
#define ALUm16(c,r,d,b) \
#define ALUm16(c,r,d,b) \
underrunProtect(9); \
MODRMm(r,d,b); \
MODRMm(r,d,b); \
*(--_nIns) = uint8_t(c);\
*(--_nIns) = 0x66
@ -301,21 +301,21 @@ namespace nanojit
*(--_nIns) = (uint8_t) (c)
#define ALUi(c,r,i) \
underrunProtect(6); \
underrunProtect(6); \
NanoAssert(unsigned(r)<8);\
if (isS8(i)) { \
if (isS8(i)) { \
*(--_nIns) = uint8_t(i); \
MODRM((c>>3),(r)); \
*(--_nIns) = uint8_t(0x83); \
} else { \
IMM32(i); \
if ( (r) == EAX) { \
*(--_nIns) = (uint8_t) (c); \
} else { \
} else { \
IMM32(i); \
if ( (r) == EAX) { \
*(--_nIns) = (uint8_t) (c); \
} else { \
MODRM((c>>3),(r)); \
*(--_nIns) = uint8_t(0x81); \
} \
}
} \
}
#define ALUmi(c,d,b,i) \
underrunProtect(10); \
@ -332,25 +332,25 @@ namespace nanojit
#define ALU2(c,d,s) \
underrunProtect(3); \
MODRM((d),(s)); \
MODRM((d),(s)); \
_nIns -= 2; \
_nIns[0] = (uint8_t) ( ((c)>>8) ); \
_nIns[1] = (uint8_t) ( (c) )
#define LAHF() do { count_alu(); ALU0(0x9F); asm_output("lahf"); } while(0)
#define SAHF() do { count_alu(); ALU0(0x9E); asm_output("sahf"); } while(0)
#define OR(l,r) do { count_alu(); ALU(0x0b, (l),(r)); asm_output("or %s,%s",gpn(l),gpn(r)); } while(0)
#define AND(l,r) do { count_alu(); ALU(0x23, (l),(r)); asm_output("and %s,%s",gpn(l),gpn(r)); } while(0)
#define XOR(l,r) do { count_alu(); ALU(0x33, (l),(r)); asm_output("xor %s,%s",gpn(l),gpn(r)); } while(0)
#define ADD(l,r) do { count_alu(); ALU(0x03, (l),(r)); asm_output("add %s,%s",gpn(l),gpn(r)); } while(0)
#define SUB(l,r) do { count_alu(); ALU(0x2b, (l),(r)); asm_output("sub %s,%s",gpn(l),gpn(r)); } while(0)
#define MUL(l,r) do { count_alu(); ALU2(0x0faf,(l),(r)); asm_output("mul %s,%s",gpn(l),gpn(r)); } while(0)
#define LAHF() do { count_alu(); ALU0(0x9F); asm_output("lahf"); } while(0)
#define SAHF() do { count_alu(); ALU0(0x9E); asm_output("sahf"); } while(0)
#define OR(l,r) do { count_alu(); ALU(0x0b, (l),(r)); asm_output("or %s,%s",gpn(l),gpn(r)); } while(0)
#define AND(l,r) do { count_alu(); ALU(0x23, (l),(r)); asm_output("and %s,%s",gpn(l),gpn(r)); } while(0)
#define XOR(l,r) do { count_alu(); ALU(0x33, (l),(r)); asm_output("xor %s,%s",gpn(l),gpn(r)); } while(0)
#define ADD(l,r) do { count_alu(); ALU(0x03, (l),(r)); asm_output("add %s,%s",gpn(l),gpn(r)); } while(0)
#define SUB(l,r) do { count_alu(); ALU(0x2b, (l),(r)); asm_output("sub %s,%s",gpn(l),gpn(r)); } while(0)
#define MUL(l,r) do { count_alu(); ALU2(0x0faf,(l),(r)); asm_output("mul %s,%s",gpn(l),gpn(r)); } while(0)
#define DIV(r) do { count_alu(); ALU(0xf7, (Register)7,(r)); asm_output("idiv edx:eax, %s",gpn(r)); } while(0)
#define NOT(r) do { count_alu(); ALU(0xf7, (Register)2,(r)); asm_output("not %s",gpn(r)); } while(0)
#define NEG(r) do { count_alu(); ALU(0xf7, (Register)3,(r)); asm_output("neg %s",gpn(r)); } while(0)
#define SHR(r,s) do { count_alu(); ALU(0xd3, (Register)5,(r)); asm_output("shr %s,%s",gpn(r),gpn(s)); } while(0)
#define SAR(r,s) do { count_alu(); ALU(0xd3, (Register)7,(r)); asm_output("sar %s,%s",gpn(r),gpn(s)); } while(0)
#define SHL(r,s) do { count_alu(); ALU(0xd3, (Register)4,(r)); asm_output("shl %s,%s",gpn(r),gpn(s)); } while(0)
#define NOT(r) do { count_alu(); ALU(0xf7, (Register)2,(r)); asm_output("not %s",gpn(r)); } while(0)
#define NEG(r) do { count_alu(); ALU(0xf7, (Register)3,(r)); asm_output("neg %s",gpn(r)); } while(0)
#define SHR(r,s) do { count_alu(); ALU(0xd3, (Register)5,(r)); asm_output("shr %s,%s",gpn(r),gpn(s)); } while(0)
#define SAR(r,s) do { count_alu(); ALU(0xd3, (Register)7,(r)); asm_output("sar %s,%s",gpn(r),gpn(s)); } while(0)
#define SHL(r,s) do { count_alu(); ALU(0xd3, (Register)4,(r)); asm_output("shl %s,%s",gpn(r),gpn(s)); } while(0)
#define SHIFT(c,r,i) \
underrunProtect(3);\
@ -358,64 +358,64 @@ namespace nanojit
MODRM((Register)c,r);\
*--_nIns = 0xc1;
#define SHLi(r,i) do { count_alu(); SHIFT(4,r,i); asm_output("shl %s,%d", gpn(r),i); } while(0)
#define SHRi(r,i) do { count_alu(); SHIFT(5,r,i); asm_output("shr %s,%d", gpn(r),i); } while(0)
#define SARi(r,i) do { count_alu(); SHIFT(7,r,i); asm_output("sar %s,%d", gpn(r),i); } while(0)
#define SHLi(r,i) do { count_alu(); SHIFT(4,r,i); asm_output("shl %s,%d", gpn(r),i); } while(0)
#define SHRi(r,i) do { count_alu(); SHIFT(5,r,i); asm_output("shr %s,%d", gpn(r),i); } while(0)
#define SARi(r,i) do { count_alu(); SHIFT(7,r,i); asm_output("sar %s,%d", gpn(r),i); } while(0)
#define MOVZX8(d,s) do { count_alu(); ALU2(0x0fb6,d,s); asm_output("movzx %s,%s", gpn(d),gpn(s)); } while(0)
#define SUBi(r,i) do { count_alu(); ALUi(0x2d,r,i); asm_output("sub %s,%d",gpn(r),i); } while(0)
#define ADDi(r,i) do { count_alu(); ALUi(0x05,r,i); asm_output("add %s,%d",gpn(r),i); } while(0)
#define ANDi(r,i) do { count_alu(); ALUi(0x25,r,i); asm_output("and %s,%d",gpn(r),i); } while(0)
#define ORi(r,i) do { count_alu(); ALUi(0x0d,r,i); asm_output("or %s,%d",gpn(r),i); } while(0)
#define XORi(r,i) do { count_alu(); ALUi(0x35,r,i); asm_output("xor %s,%d",gpn(r),i); } while(0)
#define SUBi(r,i) do { count_alu(); ALUi(0x2d,r,i); asm_output("sub %s,%d",gpn(r),i); } while(0)
#define ADDi(r,i) do { count_alu(); ALUi(0x05,r,i); asm_output("add %s,%d",gpn(r),i); } while(0)
#define ANDi(r,i) do { count_alu(); ALUi(0x25,r,i); asm_output("and %s,%d",gpn(r),i); } while(0)
#define ORi(r,i) do { count_alu(); ALUi(0x0d,r,i); asm_output("or %s,%d",gpn(r),i); } while(0)
#define XORi(r,i) do { count_alu(); ALUi(0x35,r,i); asm_output("xor %s,%d",gpn(r),i); } while(0)
#define ADDmi(d,b,i) do { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); } while(0)
#define ADDmi(d,b,i) do { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); } while(0)
#define TEST(d,s) do { count_alu(); ALU(0x85,d,s); asm_output("test %s,%s",gpn(d),gpn(s)); } while(0)
#define CMP(l,r) do { count_alu(); ALU(0x3b, (l),(r)); asm_output("cmp %s,%s",gpn(l),gpn(r)); } while(0)
#define CMPi(r,i) do { count_alu(); ALUi(0x3d,r,i); asm_output("cmp %s,%d",gpn(r),i); } while(0)
#define TEST(d,s) do { count_alu(); ALU(0x85,d,s); asm_output("test %s,%s",gpn(d),gpn(s)); } while(0)
#define CMP(l,r) do { count_alu(); ALU(0x3b, (l),(r)); asm_output("cmp %s,%s",gpn(l),gpn(r)); } while(0)
#define CMPi(r,i) do { count_alu(); ALUi(0x3d,r,i); asm_output("cmp %s,%d",gpn(r),i); } while(0)
#define MR(d,s) do { count_mov(); ALU(0x8b,d,s); asm_output("mov %s,%s",gpn(d),gpn(s)); } while(0)
#define LEA(r,d,b) do { count_alu(); ALUm(0x8d, r,d,b); asm_output("lea %s,%d(%s)",gpn(r),d,gpn(b)); } while(0)
#define MR(d,s) do { count_mov(); ALU(0x8b,d,s); asm_output("mov %s,%s",gpn(d),gpn(s)); } while(0)
#define LEA(r,d,b) do { count_alu(); ALUm(0x8d, r,d,b); asm_output("lea %s,%d(%s)",gpn(r),d,gpn(b)); } while(0)
// lea %r, d(%i*4)
// This addressing mode is not supported by the MODRMSIB macro.
#define LEAmi4(r,d,i) do { count_alu(); IMM32(d); *(--_nIns) = (2<<6)|(i<<3)|5; *(--_nIns) = (0<<6)|(r<<3)|4; *(--_nIns) = 0x8d; asm_output("lea %s, %p(%s*4)", gpn(r), (void*)d, gpn(i)); } while(0)
#define CDQ() do { SARi(EDX, 31); MR(EDX, EAX); } while(0)
#define SETE(r) do { count_alu(); ALU2(0x0f94,(r),(r)); asm_output("sete %s",gpn(r)); } while(0)
#define SETNP(r) do { count_alu(); ALU2(0x0f9B,(r),(r)); asm_output("setnp %s",gpn(r)); } while(0)
#define SETL(r) do { count_alu(); ALU2(0x0f9C,(r),(r)); asm_output("setl %s",gpn(r)); } while(0)
#define SETLE(r) do { count_alu(); ALU2(0x0f9E,(r),(r)); asm_output("setle %s",gpn(r)); } while(0)
#define SETG(r) do { count_alu(); ALU2(0x0f9F,(r),(r)); asm_output("setg %s",gpn(r)); } while(0)
#define SETGE(r) do { count_alu(); ALU2(0x0f9D,(r),(r)); asm_output("setge %s",gpn(r)); } while(0)
#define SETB(r) do { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("setb %s",gpn(r)); } while(0)
#define SETBE(r) do { count_alu(); ALU2(0x0f96,(r),(r)); asm_output("setbe %s",gpn(r)); } while(0)
#define SETA(r) do { count_alu(); ALU2(0x0f97,(r),(r)); asm_output("seta %s",gpn(r)); } while(0)
#define SETAE(r) do { count_alu(); ALU2(0x0f93,(r),(r)); asm_output("setae %s",gpn(r)); } while(0)
#define SETO(r) do { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("seto %s",gpn(r)); } while(0)
#define SETE(r) do { count_alu(); ALU2(0x0f94,(r),(r)); asm_output("sete %s",gpn(r)); } while(0)
#define SETNP(r) do { count_alu(); ALU2(0x0f9B,(r),(r)); asm_output("setnp %s",gpn(r)); } while(0)
#define SETL(r) do { count_alu(); ALU2(0x0f9C,(r),(r)); asm_output("setl %s",gpn(r)); } while(0)
#define SETLE(r) do { count_alu(); ALU2(0x0f9E,(r),(r)); asm_output("setle %s",gpn(r)); } while(0)
#define SETG(r) do { count_alu(); ALU2(0x0f9F,(r),(r)); asm_output("setg %s",gpn(r)); } while(0)
#define SETGE(r) do { count_alu(); ALU2(0x0f9D,(r),(r)); asm_output("setge %s",gpn(r)); } while(0)
#define SETB(r) do { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("setb %s",gpn(r)); } while(0)
#define SETBE(r) do { count_alu(); ALU2(0x0f96,(r),(r)); asm_output("setbe %s",gpn(r)); } while(0)
#define SETA(r) do { count_alu(); ALU2(0x0f97,(r),(r)); asm_output("seta %s",gpn(r)); } while(0)
#define SETAE(r) do { count_alu(); ALU2(0x0f93,(r),(r)); asm_output("setae %s",gpn(r)); } while(0)
#define SETO(r) do { count_alu(); ALU2(0x0f92,(r),(r)); asm_output("seto %s",gpn(r)); } while(0)
#define MREQ(dr,sr) do { count_alu(); ALU2(0x0f44,dr,sr); asm_output("cmove %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRNE(dr,sr) do { count_alu(); ALU2(0x0f45,dr,sr); asm_output("cmovne %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRL(dr,sr) do { count_alu(); ALU2(0x0f4C,dr,sr); asm_output("cmovl %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRLE(dr,sr) do { count_alu(); ALU2(0x0f4E,dr,sr); asm_output("cmovle %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRG(dr,sr) do { count_alu(); ALU2(0x0f4F,dr,sr); asm_output("cmovg %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRGE(dr,sr) do { count_alu(); ALU2(0x0f4D,dr,sr); asm_output("cmovge %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRB(dr,sr) do { count_alu(); ALU2(0x0f42,dr,sr); asm_output("cmovb %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRBE(dr,sr) do { count_alu(); ALU2(0x0f46,dr,sr); asm_output("cmovbe %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRA(dr,sr) do { count_alu(); ALU2(0x0f47,dr,sr); asm_output("cmova %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRAE(dr,sr) do { count_alu(); ALU2(0x0f43,dr,sr); asm_output("cmovae %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRNO(dr,sr) do { count_alu(); ALU2(0x0f41,dr,sr); asm_output("cmovno %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MREQ(dr,sr) do { count_alu(); ALU2(0x0f44,dr,sr); asm_output("cmove %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRNE(dr,sr) do { count_alu(); ALU2(0x0f45,dr,sr); asm_output("cmovne %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRL(dr,sr) do { count_alu(); ALU2(0x0f4C,dr,sr); asm_output("cmovl %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRLE(dr,sr) do { count_alu(); ALU2(0x0f4E,dr,sr); asm_output("cmovle %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRG(dr,sr) do { count_alu(); ALU2(0x0f4F,dr,sr); asm_output("cmovg %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRGE(dr,sr) do { count_alu(); ALU2(0x0f4D,dr,sr); asm_output("cmovge %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRB(dr,sr) do { count_alu(); ALU2(0x0f42,dr,sr); asm_output("cmovb %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRBE(dr,sr) do { count_alu(); ALU2(0x0f46,dr,sr); asm_output("cmovbe %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRA(dr,sr) do { count_alu(); ALU2(0x0f47,dr,sr); asm_output("cmova %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRAE(dr,sr) do { count_alu(); ALU2(0x0f43,dr,sr); asm_output("cmovae %s,%s", gpn(dr),gpn(sr)); } while(0)
#define MRNO(dr,sr) do { count_alu(); ALU2(0x0f41,dr,sr); asm_output("cmovno %s,%s", gpn(dr),gpn(sr)); } while(0)
// these aren't currently used but left in for reference
//#define LDEQ(r,d,b) do { ALU2m(0x0f44,r,d,b); asm_output("cmove %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
//#define LDNEQ(r,d,b) do { ALU2m(0x0f45,r,d,b); asm_output("cmovne %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
#define LD(reg,disp,base) do { \
#define LD(reg,disp,base) do { \
count_ld();\
ALUm(0x8b,reg,disp,base); \
asm_output("mov %s,%d(%s)",gpn(reg),disp,gpn(base)); } while(0)
ALUm(0x8b,reg,disp,base); \
asm_output("mov %s,%d(%s)",gpn(reg),disp,gpn(base)); } while(0)
#define LDdm(reg,addr) do { \
count_ld(); \
@ -468,63 +468,63 @@ namespace nanojit
#define LDi(r,i) do { \
count_ld();\
underrunProtect(5); \
IMM32(i); \
underrunProtect(5); \
IMM32(i); \
NanoAssert(((unsigned)r)<8); \
*(--_nIns) = (uint8_t) (0xb8 | (r) ); \
asm_output("mov %s,%d",gpn(r),i); } while(0)
*(--_nIns) = (uint8_t) (0xb8 | (r) ); \
asm_output("mov %s,%d",gpn(r),i); } while(0)
#define ST(base,disp,reg) do { \
count_st();\
ALUm(0x89,reg,disp,base); \
asm_output("mov %d(%s),%s",disp,base==UnknownReg?"0":gpn(base),gpn(reg)); } while(0)
ALUm(0x89,reg,disp,base); \
asm_output("mov %d(%s),%s",disp,base==UnknownReg?"0":gpn(base),gpn(reg)); } while(0)
#define STi(base,disp,imm) do { \
#define STi(base,disp,imm) do { \
count_st();\
underrunProtect(12); \
IMM32(imm); \
MODRMm(0, disp, base); \
*(--_nIns) = 0xc7; \
asm_output("mov %d(%s),%d",disp,gpn(base),imm); } while(0)
IMM32(imm); \
MODRMm(0, disp, base); \
*(--_nIns) = 0xc7; \
asm_output("mov %d(%s),%d",disp,gpn(base),imm); } while(0)
#define RET() do { count_ret(); ALU0(0xc3); asm_output("ret"); } while(0)
#define NOP() do { count_alu(); ALU0(0x90); asm_output("nop"); } while(0)
#define NOP() do { count_alu(); ALU0(0x90); asm_output("nop"); } while(0)
#define INT3() do { ALU0(0xcc); asm_output("int3"); } while(0)
#define PUSHi(i) do { \
count_push();\
if (isS8(i)) { \
underrunProtect(2); \
underrunProtect(2); \
_nIns-=2; _nIns[0] = 0x6a; _nIns[1] = (uint8_t)(i); \
asm_output("push %d",i); \
asm_output("push %d",i); \
} else \
{ PUSHi32(i); } } while(0)
#define PUSHi32(i) do { \
#define PUSHi32(i) do { \
count_push();\
underrunProtect(5); \
IMM32(i); \
*(--_nIns) = 0x68; \
asm_output("push %d",i); } while(0)
underrunProtect(5); \
IMM32(i); \
*(--_nIns) = 0x68; \
asm_output("push %d",i); } while(0)
#define PUSHr(r) do { \
count_push();\
underrunProtect(1); \
underrunProtect(1); \
NanoAssert(((unsigned)r)<8); \
*(--_nIns) = (uint8_t) ( 0x50 | (r) ); \
asm_output("push %s",gpn(r)); } while(0)
*(--_nIns) = (uint8_t) ( 0x50 | (r) ); \
asm_output("push %s",gpn(r)); } while(0)
#define PUSHm(d,b) do { \
count_pushld();\
ALUm(0xff, 6, d, b); \
asm_output("push %d(%s)",d,gpn(b)); } while(0)
asm_output("push %d(%s)",d,gpn(b)); } while(0)
#define POPr(r) do { \
count_pop();\
underrunProtect(1); \
underrunProtect(1); \
NanoAssert(((unsigned)r)<8); \
*(--_nIns) = (uint8_t) ( 0x58 | (r) ); \
asm_output("pop %s",gpn(r)); } while(0)
asm_output("pop %s",gpn(r)); } while(0)
#define JCC32 0x0f
#define JMP8 0xeb
@ -532,7 +532,7 @@ namespace nanojit
#define JCC(o,t,isfar,n) do { \
count_jcc();\
underrunProtect(6); \
underrunProtect(6); \
intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \
if (isS8(tt) && !isfar) { \
verbose_only( NIns* next = _nIns; (void)next; ) \
@ -551,32 +551,32 @@ namespace nanojit
#define JMP_long(t) do { \
count_jmp();\
underrunProtect(5); \
underrunProtect(5); \
intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \
JMP_long_nochk_offset(tt); \
JMP_long_nochk_offset(tt); \
verbose_only( verbose_outputf("%010lx:", (unsigned long)_nIns); ) \
} while(0)
#define JMP(t) do { \
#define JMP(t) do { \
count_jmp();\
underrunProtect(5); \
underrunProtect(5); \
intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \
if (isS8(tt)) { \
verbose_only( NIns* next = _nIns; (void)next; ) \
_nIns -= 2; \
_nIns[0] = JMP8; \
_nIns[1] = (uint8_t) ( (tt)&0xff ); \
asm_output("jmp %p",(next+tt)); \
asm_output("jmp %p",(next+tt)); \
} else { \
JMP_long_nochk_offset(tt); \
JMP_long_nochk_offset(tt); \
} } while(0)
// this should only be used when you can guarantee there is enough room on the page
#define JMP_long_nochk_offset(o) do {\
verbose_only( NIns* next = _nIns; (void)next; ) \
IMM32((o)); \
*(--_nIns) = JMP32; \
asm_output("jmp %p",(next+(o))); } while(0)
IMM32((o)); \
*(--_nIns) = JMP32; \
asm_output("jmp %p",(next+(o))); } while(0)
#define JMP_indirect(r) do { \
underrunProtect(2); \
@ -614,18 +614,18 @@ namespace nanojit
// sse instructions
#define SSE(c,d,s) \
underrunProtect(9); \
MODRM((d),(s)); \
underrunProtect(9); \
MODRM((d),(s)); \
_nIns -= 3; \
_nIns[0] = (uint8_t)(((c)>>16)&0xff); \
_nIns[0] = (uint8_t)(((c)>>16)&0xff); \
_nIns[1] = (uint8_t)(((c)>>8)&0xff); \
_nIns[2] = (uint8_t)((c)&0xff)
#define SSEm(c,r,d,b) \
underrunProtect(9); \
MODRMm((r),(d),(b)); \
_nIns -= 3; \
_nIns[0] = (uint8_t)(((c)>>16)&0xff); \
#define SSEm(c,r,d,b) \
underrunProtect(9); \
MODRMm((r),(d),(b)); \
_nIns -= 3; \
_nIns[0] = (uint8_t)(((c)>>16)&0xff); \
_nIns[1] = (uint8_t)(((c)>>8)&0xff); \
_nIns[2] = (uint8_t)((c)&0xff)
@ -656,13 +656,13 @@ namespace nanojit
#define SSE_LDQ(r,d,b)do { \
count_ldq();\
SSEm(0xf30f7e, (r)&7, (d), (b)); \
asm_output("movq %s,%d(%s)",gpn(r),d,gpn(b)); \
asm_output("movq %s,%d(%s)",gpn(r),d,gpn(b)); \
} while(0)
#define SSE_STQ(d,b,r)do { \
count_stq();\
SSEm(0x660fd6, (r)&7, (d), (b)); \
asm_output("movq %d(%s),%s",(d),gpn(b),gpn(r)); \
asm_output("movq %d(%s),%s",(d),gpn(b),gpn(r)); \
} while(0)
#define SSE_CVTSI2SD(xr,gr) do{ \
@ -688,7 +688,7 @@ namespace nanojit
NanoAssert(_is_xmm_reg_(d)); \
SSE(0x660f6e, (d)&7, (s)&7); \
} \
asm_output("movd %s,%s",gpn(d),gpn(s)); \
asm_output("movd %s,%s",gpn(d),gpn(s)); \
} while(0)
#define SSE_MOVSD(rd,rs) do{ \
@ -702,7 +702,7 @@ namespace nanojit
count_st();\
NanoAssert(_is_xmm_reg_(xrs) && _is_gp_reg_(b));\
SSEm(0x660f7e, (xrs)&7, d, b);\
asm_output("movd %d(%s),%s", d, gpn(b), gpn(xrs));\
asm_output("movd %d(%s),%s", d, gpn(b), gpn(xrs));\
} while(0)
#define SSE_ADDSD(rd,rs) do{ \
@ -775,80 +775,80 @@ namespace nanojit
} while(0)
// floating point unit
#define FPUc(o) \
underrunProtect(2); \
*(--_nIns) = ((uint8_t)(o)&0xff); \
#define FPUc(o) \
underrunProtect(2); \
*(--_nIns) = ((uint8_t)(o)&0xff); \
*(--_nIns) = (uint8_t)(((o)>>8)&0xff)
#define FPU(o,r) \
underrunProtect(2); \
underrunProtect(2); \
*(--_nIns) = uint8_t(((uint8_t)(o)&0xff) | (r&7));\
*(--_nIns) = (uint8_t)(((o)>>8)&0xff)
#define FPUm(o,d,b) \
underrunProtect(7); \
MODRMm((uint8_t)(o), d, b); \
#define FPUm(o,d,b) \
underrunProtect(7); \
MODRMm((uint8_t)(o), d, b); \
*(--_nIns) = (uint8_t)((o)>>8)
#define TEST_AH(i) do { \
count_alu();\
underrunProtect(3); \
underrunProtect(3); \
*(--_nIns) = ((uint8_t)(i)); \
*(--_nIns) = 0xc4; \
*(--_nIns) = 0xf6; \
asm_output("test ah, %d",i); } while(0)
*(--_nIns) = 0xc4; \
*(--_nIns) = 0xf6; \
asm_output("test ah, %d",i); } while(0)
#define TEST_AX(i) do { \
count_fpu();\
underrunProtect(5); \
*(--_nIns) = (0); \
underrunProtect(5); \
*(--_nIns) = (0); \
*(--_nIns) = ((uint8_t)(i)); \
*(--_nIns) = ((uint8_t)((i)>>8)); \
*(--_nIns) = (0); \
*(--_nIns) = 0xa9; \
asm_output("test ax, %d",i); } while(0)
*(--_nIns) = ((uint8_t)((i)>>8)); \
*(--_nIns) = (0); \
*(--_nIns) = 0xa9; \
asm_output("test ax, %d",i); } while(0)
#define FNSTSW_AX() do { count_fpu(); FPUc(0xdfe0); asm_output("fnstsw_ax"); } while(0)
#define FCHS() do { count_fpu(); FPUc(0xd9e0); asm_output("fchs"); } while(0)
#define FLD1() do { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); } while(0)
#define FLDZ() do { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); } while(0)
#define FFREE(r) do { count_fpu(); FPU(0xddc0, r); asm_output("ffree %s",fpn(r)); } while(0)
#define FSTQ(p,d,b) do { count_stq(); FPUm(0xdd02|(p), d, b); asm_output("fst%sq %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
#define FNSTSW_AX() do { count_fpu(); FPUc(0xdfe0); asm_output("fnstsw_ax"); } while(0)
#define FCHS() do { count_fpu(); FPUc(0xd9e0); asm_output("fchs"); } while(0)
#define FLD1() do { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); } while(0)
#define FLDZ() do { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); } while(0)
#define FFREE(r) do { count_fpu(); FPU(0xddc0, r); asm_output("ffree %s",fpn(r)); } while(0)
#define FSTQ(p,d,b) do { count_stq(); FPUm(0xdd02|(p), d, b); asm_output("fst%sq %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
#define FSTPQ(d,b) FSTQ(1,d,b)
#define FCOM(p,d,b) do { count_fpuld(); FPUm(0xdc02|(p), d, b); asm_output("fcom%s %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
#define FLDQ(d,b) do { count_ldq(); FPUm(0xdd00, d, b); asm_output("fldq %d(%s)",d,gpn(b)); fpu_push();} while(0)
#define FILDQ(d,b) do { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)",d,gpn(b)); fpu_push(); } while(0)
#define FILD(d,b) do { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)",d,gpn(b)); fpu_push(); } while(0)
#define FADD(d,b) do { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)",d,gpn(b)); } while(0)
#define FSUB(d,b) do { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)",d,gpn(b)); } while(0)
#define FSUBR(d,b) do { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)",d,gpn(b)); } while(0)
#define FMUL(d,b) do { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)",d,gpn(b)); } while(0)
#define FDIV(d,b) do { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)",d,gpn(b)); } while(0)
#define FDIVR(d,b) do { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)",d,gpn(b)); } while(0)
#define FINCSTP() do { count_fpu(); FPUc(0xd9f7); asm_output("fincstp"); } while(0)
#define FSTP(r) do { count_fpu(); FPU(0xddd8, r&7); asm_output("fstp %s",fpn(r)); fpu_pop();} while(0)
#define FCOMP() do { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();} while(0)
#define FCOMPP() do { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();} while(0)
#define FLDr(r) do { count_ldq(); FPU(0xd9c0,r); asm_output("fld %s",fpn(r)); fpu_push(); } while(0)
#define EMMS() do { count_fpu(); FPUc(0x0f77); asm_output("emms"); } while (0)
#define FCOM(p,d,b) do { count_fpuld(); FPUm(0xdc02|(p), d, b); asm_output("fcom%s %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
#define FLDQ(d,b) do { count_ldq(); FPUm(0xdd00, d, b); asm_output("fldq %d(%s)",d,gpn(b)); fpu_push();} while(0)
#define FILDQ(d,b) do { count_fpuld(); FPUm(0xdf05, d, b); asm_output("fildq %d(%s)",d,gpn(b)); fpu_push(); } while(0)
#define FILD(d,b) do { count_fpuld(); FPUm(0xdb00, d, b); asm_output("fild %d(%s)",d,gpn(b)); fpu_push(); } while(0)
#define FADD(d,b) do { count_fpu(); FPUm(0xdc00, d, b); asm_output("fadd %d(%s)",d,gpn(b)); } while(0)
#define FSUB(d,b) do { count_fpu(); FPUm(0xdc04, d, b); asm_output("fsub %d(%s)",d,gpn(b)); } while(0)
#define FSUBR(d,b) do { count_fpu(); FPUm(0xdc05, d, b); asm_output("fsubr %d(%s)",d,gpn(b)); } while(0)
#define FMUL(d,b) do { count_fpu(); FPUm(0xdc01, d, b); asm_output("fmul %d(%s)",d,gpn(b)); } while(0)
#define FDIV(d,b) do { count_fpu(); FPUm(0xdc06, d, b); asm_output("fdiv %d(%s)",d,gpn(b)); } while(0)
#define FDIVR(d,b) do { count_fpu(); FPUm(0xdc07, d, b); asm_output("fdivr %d(%s)",d,gpn(b)); } while(0)
#define FINCSTP() do { count_fpu(); FPUc(0xd9f7); asm_output("fincstp"); } while(0)
#define FSTP(r) do { count_fpu(); FPU(0xddd8, r&7); asm_output("fstp %s",fpn(r)); fpu_pop();} while(0)
#define FCOMP() do { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();} while(0)
#define FCOMPP() do { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();} while(0)
#define FLDr(r) do { count_ldq(); FPU(0xd9c0,r); asm_output("fld %s",fpn(r)); fpu_push(); } while(0)
#define EMMS() do { count_fpu(); FPUc(0x0f77); asm_output("emms"); } while (0)
// standard direct call
#define CALL(c) do { \
#define CALL(c) do { \
count_call();\
underrunProtect(5); \
underrunProtect(5); \
int offset = (c->_address) - ((int)_nIns); \
IMM32( (uint32_t)offset ); \
*(--_nIns) = 0xE8; \
verbose_only(asm_output("call %s",(c->_name));) \
verbose_only(asm_output("call %s",(c->_name));) \
debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\
} while (0)
// indirect call thru register
#define CALLr(c,r) do { \
#define CALLr(c,r) do { \
count_calli();\
underrunProtect(2);\
ALU(0xff, 2, (r));\
verbose_only(asm_output("call %s",gpn(r));) \
verbose_only(asm_output("call %s",gpn(r));) \
debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\
} while (0)