Bug 580515 - TM: LIR_cmovd mishandled with X86_FORCE_SSE2=no. r=edwsmith.

--HG--
extra : convert_revision : 4effe362e918583ec7b98b08da24f02c0833d306
This commit is contained in:
Nicholas Nethercote 2010-12-01 14:23:44 -08:00
parent 884504296d
commit 18402713d9
4 changed files with 142 additions and 108 deletions

View File

@ -1173,20 +1173,24 @@ namespace nanojit
Register rf = findRegFor(iffalse, allow & ~rmask(rr));
if (ins->isop(LIR_cmovd)) {
// See Nativei386.cpp:asm_cmov() for an explanation of the subtleties here.
NIns* target = _nIns;
asm_nongp_copy(rr, rf);
asm_branch(false, cond, target);
asm_branch_helper(false, cond, target);
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
if (rr != rt)
asm_nongp_copy(rr, rt);
freeResourcesOf(ins);
if (!iftrue->isInReg()) {
NanoAssert(rt == rr);
findSpecificRegForUnallocated(iftrue, rr);
}
asm_cmp(cond);
return;
}
@ -1194,8 +1198,8 @@ namespace nanojit
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
// WARNING: We cannot generate any code that affects the condition
// codes between the MRcc generation here and the asm_cmp() call
// below. See asm_cmp() for more details.
// codes between the MRcc generation here and the asm_cmpi() call
// below. See asm_cmpi() for more details.
LOpcode condop = cond->opcode();
if (ins->isop(LIR_cmovi)) {
switch (condop) {
@ -1234,30 +1238,36 @@ namespace nanojit
findSpecificRegForUnallocated(iftrue, rr);
}
asm_cmp(cond);
asm_cmpi(cond);
}
NIns* Assembler::asm_branch(bool onFalse, LIns *cond, NIns *target) {
NanoAssert(cond->isCmp());
LOpcode condop = cond->opcode();
NIns* Assembler::asm_branch(bool onFalse, LIns* cond, NIns* target) {
NIns* patch = asm_branch_helper(onFalse, cond, target);
asm_cmp(cond);
return patch;
}
NIns* Assembler::asm_branch_helper(bool onFalse, LIns *cond, NIns *target) {
if (target && !isTargetWithinS32(target)) {
// conditional jumps beyond 32bit range, so invert the branch/compare
// and emit an unconditional jump to the target
// A conditional jump beyond 32-bit range, so invert the
// branch/compare and emit an unconditional jump to the target:
// j(inverted) B1
// jmp target
// B1:
NIns* shortTarget = _nIns;
JMP(target);
target = shortTarget;
onFalse = !onFalse;
}
if (isCmpDOpcode(condop))
return asm_branchd(onFalse, cond, target);
return isCmpDOpcode(cond->opcode())
? asm_branchd_helper(onFalse, cond, target)
: asm_branchi_helper(onFalse, cond, target);
}
NIns* Assembler::asm_branchi_helper(bool onFalse, LIns *cond, NIns *target) {
// We must ensure there's room for the instruction before calculating
// the offset. And the offset determines the opcode (8bit or 32bit).
LOpcode condop = cond->opcode();
if (target && isTargetWithinS8(target)) {
if (onFalse) {
switch (condop) {
@ -1315,9 +1325,7 @@ namespace nanojit
}
}
}
NIns *patch = _nIns; // address of instruction to patch
asm_cmp(cond);
return patch;
return _nIns; // address of instruction to patch
}
NIns* Assembler::asm_branch_ov(LOpcode, NIns* target) {
@ -1334,13 +1342,17 @@ namespace nanojit
return _nIns;
}
void Assembler::asm_cmp(LIns *cond) {
isCmpDOpcode(cond->opcode()) ? asm_cmpd(cond) : asm_cmpi(cond);
}
// WARNING: this function cannot generate code that will affect the
// condition codes prior to the generation of the test/cmp. See
// Nativei386.cpp:asm_cmp() for details.
void Assembler::asm_cmp(LIns *cond) {
// Nativei386.cpp:asm_cmpi() for details.
void Assembler::asm_cmpi(LIns *cond) {
LIns *b = cond->oprnd2();
if (isImm32(b)) {
asm_cmp_imm(cond);
asm_cmpi_imm(cond);
return;
}
LIns *a = cond->oprnd1();
@ -1361,7 +1373,7 @@ namespace nanojit
}
}
void Assembler::asm_cmp_imm(LIns *cond) {
void Assembler::asm_cmpi_imm(LIns *cond) {
LOpcode condop = cond->opcode();
LIns *a = cond->oprnd1();
LIns *b = cond->oprnd2();
@ -1399,11 +1411,9 @@ namespace nanojit
// LIR_jt jae ja swap+jae swap+ja jp over je
// LIR_jf jb jbe swap+jb swap+jbe jne+jp
NIns* Assembler::asm_branchd(bool onFalse, LIns *cond, NIns *target) {
NIns* Assembler::asm_branchd_helper(bool onFalse, LIns *cond, NIns *target) {
LOpcode condop = cond->opcode();
NIns *patch;
LIns *a = cond->oprnd1();
LIns *b = cond->oprnd2();
if (condop == LIR_eqd) {
if (onFalse) {
// branch if unordered or !=
@ -1422,34 +1432,23 @@ namespace nanojit
}
}
else {
if (condop == LIR_ltd) {
condop = LIR_gtd;
LIns *t = a; a = b; b = t;
} else if (condop == LIR_led) {
condop = LIR_ged;
LIns *t = a; a = b; b = t;
}
if (condop == LIR_gtd) {
if (onFalse)
JBE(8, target);
else
JA(8, target);
} else { // LIR_ged
if (onFalse)
JB(8, target);
else
JAE(8, target);
// LIR_ltd and LIR_gtd are handled by the same case because
// asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a). Likewise for
// LIR_led/LIR_ged.
switch (condop) {
case LIR_ltd:
case LIR_gtd: if (onFalse) JBE(8, target); else JA(8, target); break;
case LIR_led:
case LIR_ged: if (onFalse) JB(8, target); else JAE(8, target); break;
default: NanoAssert(0); break;
}
patch = _nIns;
}
asm_cmpd(a, b);
return patch;
}
void Assembler::asm_condd(LIns *ins) {
LOpcode op = ins->opcode();
LIns *a = ins->oprnd1();
LIns *b = ins->oprnd2();
if (op == LIR_eqd) {
// result = ZF & !PF, must do logic on flags
// r = al|bl|cl|dl, can only use rh without rex prefix
@ -1460,30 +1459,40 @@ namespace nanojit
X86_SETNP(r); // setnp rh rh = !PF
X86_SETE(r); // sete rl rl = ZF
} else {
if (op == LIR_ltd) {
op = LIR_gtd;
LIns *t = a; a = b; b = t;
} else if (op == LIR_led) {
op = LIR_ged;
LIns *t = a; a = b; b = t;
}
// LIR_ltd and LIR_gtd are handled by the same case because
// asm_cmpd() converts LIR_ltd(a,b) to LIR_gtd(b,a). Likewise for
// LIR_led/LIR_ged.
Register r = prepareResultReg(ins, GpRegs); // x64 can use any GPR as setcc target
MOVZX8(r, r);
if (op == LIR_gtd)
SETA(r);
else
SETAE(r);
switch (op) {
case LIR_ltd:
case LIR_gtd: SETA(r); break;
case LIR_led:
case LIR_ged: SETAE(r); break;
default: NanoAssert(0); break;
}
}
freeResourcesOf(ins);
asm_cmpd(a, b);
asm_cmpd(ins);
}
// WARNING: This function cannot generate any code that will affect the
// condition codes prior to the generation of the ucomisd. See asm_cmp()
// condition codes prior to the generation of the ucomisd. See asm_cmpi()
// for more details.
void Assembler::asm_cmpd(LIns *a, LIns *b) {
void Assembler::asm_cmpd(LIns *cond) {
LOpcode opcode = cond->opcode();
LIns* a = cond->oprnd1();
LIns* b = cond->oprnd2();
// First, we convert (a < b) into (b > a), and (a <= b) into (b >= a).
if (opcode == LIR_ltd) {
opcode = LIR_gtd;
LIns* t = a; a = b; b = t;
} else if (opcode == LIR_led) {
opcode = LIR_ged;
LIns* t = a; a = b; b = t;
}
Register ra, rb;
findRegFor2(FpRegs, a, ra, FpRegs, b, rb);
UCOMISD(ra, rb);
@ -1518,7 +1527,7 @@ namespace nanojit
}
// WARNING: the code generated by this function must not affect the
// condition codes. See asm_cmp() for details.
// condition codes. See asm_cmpi() for details.
void Assembler::asm_restore(LIns *ins, Register r) {
if (ins->isop(LIR_allocp)) {
int d = arDisp(ins);
@ -1587,7 +1596,7 @@ namespace nanojit
}
freeResourcesOf(ins);
asm_cmp(ins);
asm_cmpi(ins);
}
void Assembler::asm_ret(LIns *ins) {

View File

@ -423,9 +423,12 @@ namespace nanojit
void endLoadRegs(LIns *ins);\
void dis(NIns *p, int bytes);\
void asm_cmp(LIns*);\
void asm_cmp_imm(LIns*);\
void asm_cmpd(LIns*, LIns*);\
NIns* asm_branchd(bool, LIns*, NIns*);\
void asm_cmpi(LIns*);\
void asm_cmpi_imm(LIns*);\
void asm_cmpd(LIns*);\
NIns* asm_branch_helper(bool, LIns*, NIns*);\
NIns* asm_branchi_helper(bool, LIns*, NIns*);\
NIns* asm_branchd_helper(bool, LIns*, NIns*);\
void asm_div(LIns *ins);\
void asm_div_mod(LIns *ins);\
int max_stk_used;\

View File

@ -854,8 +854,6 @@ namespace nanojit
inline void Assembler::FLD1() { count_fpu(); FPUc(0xd9e8); asm_output("fld1"); fpu_push(); }
inline void Assembler::FLDZ() { count_fpu(); FPUc(0xd9ee); asm_output("fldz"); fpu_push(); }
inline void Assembler::FFREE(R r) { count_fpu(); FPU(0xddc0, r); asm_output("ffree %s",gpn(r)); }
inline void Assembler::FST32(bool p, I32 d, R b){ count_stq(); FPUm(0xd902|(p?1:0), d, b); asm_output("fst%s32 %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
inline void Assembler::FSTQ(bool p, I32 d, R b) { count_stq(); FPUm(0xdd02|(p?1:0), d, b); asm_output("fst%sq %d(%s)", (p?"p":""), d, gpn(b)); if (p) fpu_pop(); }
@ -894,8 +892,6 @@ namespace nanojit
inline void Assembler::FMULdm( const double* dm) { count_ldq(); FPUdm(0xdc01, dm); asm_output("fmul (%p)", (void*)dm); }
inline void Assembler::FDIVRdm(const double* dm) { count_ldq(); FPUdm(0xdc07, dm); asm_output("fdivr (%p)", (void*)dm); }
inline void Assembler::FINCSTP() { count_fpu(); FPUc(0xd9f7); asm_output("fincstp"); fpu_pop(); }
inline void Assembler::FCOMP() { count_fpu(); FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();}
inline void Assembler::FCOMPP() { count_fpu(); FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();}
inline void Assembler::FLDr(R r) { count_ldq(); FPU(0xd9c0, r); asm_output("fld %s", gpn(r)); fpu_push(); }
@ -1208,7 +1204,7 @@ namespace nanojit
}
// WARNING: the code generated by this function must not affect the
// condition codes. See asm_cmp().
// condition codes. See asm_cmpi().
void Assembler::asm_restore(LIns* ins, Register r)
{
NanoAssert(ins->getReg() == r);
@ -1521,19 +1517,18 @@ namespace nanojit
}
}
NIns* Assembler::asm_branch(bool branchOnFalse, LIns* cond, NIns* targ)
NIns* Assembler::asm_branch_helper(bool branchOnFalse, LIns* cond, NIns* targ)
{
LOpcode condop = cond->opcode();
NanoAssert(cond->isCmp());
// Handle float conditions separately.
if (isCmpDOpcode(condop)) {
return asm_branchd(branchOnFalse, cond, targ);
}
return isCmpDOpcode(cond->opcode())
? asm_branchd_helper(branchOnFalse, cond, targ)
: asm_branchi_helper(branchOnFalse, cond, targ);
}
NIns* Assembler::asm_branchi_helper(bool branchOnFalse, LIns* cond, NIns* targ)
{
if (branchOnFalse) {
// op == LIR_xf/LIR_jf
switch (condop) {
switch (cond->opcode()) {
case LIR_eqi: JNE(targ); break;
case LIR_lti: JNL(targ); break;
case LIR_lei: JNLE(targ); break;
@ -1547,7 +1542,7 @@ namespace nanojit
}
} else {
// op == LIR_xt/LIR_jt
switch (condop) {
switch (cond->opcode()) {
case LIR_eqi: JE(targ); break;
case LIR_lti: JL(targ); break;
case LIR_lei: JLE(targ); break;
@ -1560,7 +1555,12 @@ namespace nanojit
default: NanoAssert(0); break;
}
}
NIns* at = _nIns;
return _nIns;
}
NIns* Assembler::asm_branch(bool branchOnFalse, LIns* cond, NIns* targ)
{
NIns* at = asm_branch_helper(branchOnFalse, cond, targ);
asm_cmp(cond);
return at;
}
@ -1584,6 +1584,11 @@ namespace nanojit
JMP_indexed(indexreg, 2, table);
}
void Assembler::asm_cmp(LIns *cond)
{
isCmpDOpcode(cond->opcode()) ? asm_cmpd(cond) : asm_cmpi(cond);
}
// This generates a 'test' or 'cmp' instruction for a condition, which
// causes the condition codes to be set appropriately. It's used with
// conditional branches, conditional moves, and when generating
@ -1623,7 +1628,7 @@ namespace nanojit
// asm_restore(), that means that asm_restore() cannot generate code which
// affects the condition codes.
//
void Assembler::asm_cmp(LIns *cond)
void Assembler::asm_cmpi(LIns *cond)
{
LIns* lhs = cond->oprnd1();
LIns* rhs = cond->oprnd2();
@ -1734,7 +1739,7 @@ namespace nanojit
freeResourcesOf(ins);
asm_cmp(ins);
asm_cmpi(ins);
}
// Two example cases for "ins = add lhs, rhs". '*' lines are those
@ -2051,11 +2056,10 @@ namespace nanojit
(ins->isop(LIR_cmovd) && iftrue->isD() && iffalse->isD()));
if (!_config.i386_sse2 && ins->isop(LIR_cmovd)) {
// See the SSE2 case below for an explanation of the subtleties here.
debug_only( Register rr = ) prepareResultReg(ins, x87Regs);
NanoAssert(FST0 == rr);
NanoAssert(!iftrue->isInReg() || iftrue->getReg() == FST0);
NanoAssert(!iffalse->isInReg());
NanoAssert(!iftrue->isInReg() && !iffalse->isInReg());
NIns* target = _nIns;
@ -2065,52 +2069,73 @@ namespace nanojit
int df = findMemFor(iffalse);
FLDQ(df, FP);
}
FSTP(FST0); // pop the stack
asm_branch_helper(false, condval, target);
FINCSTP();
// Its not sufficient to merely decrement the FP stack pointer, we have to
// also free FST0, otherwise the load above fails.
FFREE(FST0);
asm_branch(false, condval, target);
NanoAssert(ins->getReg() == rr);
freeResourcesOf(ins);
if (!iftrue->isInReg())
findSpecificRegForUnallocated(iftrue, FST0);
asm_cmp(condval);
return;
}
RegisterMask allow = ins->isD() ? XmmRegs : GpRegs;
Register rr = prepareResultReg(ins, allow);
Register rf = findRegFor(iffalse, allow & ~rmask(rr));
if (ins->isop(LIR_cmovd)) {
// The obvious way to handle this is as follows:
//
// mov rr, rt # only needed if rt is live afterwards
// do comparison
// jt end
// mov rr, rf
// end:
//
// The problem with this is that doing the comparison can cause
// registers to be evicted, possibly including 'rr', which holds
// 'ins'. And that screws things up. So instead we do this:
//
// do comparison
// mov rr, rt # only needed if rt is live afterwards
// jt end
// mov rr, rf
// end:
//
// Putting the 'mov' between the comparison and the jump is ok
// because move instructions don't modify the condition codes.
//
NIns* target = _nIns;
asm_nongp_copy(rr, rf);
asm_branch(false, condval, target);
asm_branch_helper(false, condval, target);
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
if (rr != rt)
asm_nongp_copy(rr, rt);
NanoAssert(ins->getReg() == rr);
freeResourcesOf(ins);
if (!iftrue->isInReg()) {
NanoAssert(rt == rr);
findSpecificRegForUnallocated(iftrue, rr);
}
asm_cmp(condval);
return;
}
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
NanoAssert(ins->isop(LIR_cmovi));
// WARNING: We cannot generate any code that affects the condition
// codes between the MRcc generation here and the asm_cmp() call
// below. See asm_cmp() for more details.
// codes between the MRcc generation here and the asm_cmpi() call
// below. See asm_cmpi() for more details.
switch (condval->opcode()) {
// Note that these are all opposites...
case LIR_eqi: MRNE(rr, rf); break;
@ -2128,6 +2153,7 @@ namespace nanojit
if (rr != rt)
MR(rr, rt);
NanoAssert(ins->getReg() == rr);
freeResourcesOf(ins);
if (!iftrue->isInReg()) {
NanoAssert(rt == rr);
@ -2614,7 +2640,7 @@ namespace nanojit
}
}
NIns* Assembler::asm_branchd(bool branchOnFalse, LIns *cond, NIns *targ)
NIns* Assembler::asm_branchd_helper(bool branchOnFalse, LIns* cond, NIns *targ)
{
NIns* at = 0;
LOpcode opcode = cond->opcode();
@ -2673,14 +2699,13 @@ namespace nanojit
if (!at)
at = _nIns;
asm_cmpd(cond);
return at;
}
// WARNING: This function cannot generate any code that will affect the
// condition codes prior to the generation of the
// ucomisd/fcompp/fcmop/fcom. See asm_cmp() for more details.
// ucomisd/fcompp/fcmop/fcom. See asm_cmpi() for more details.
void Assembler::asm_cmpd(LIns *cond)
{
LOpcode condop = cond->opcode();
@ -2699,14 +2724,13 @@ namespace nanojit
LIns* t = lhs; lhs = rhs; rhs = t;
}
// LIR_eqd, if lhs == rhs:
// ucomisd ZPC outcome (SETNP/JNP succeeds if P==0)
// ------- --- -------
// UNORDERED 111 SETNP/JNP fails
// EQUAL 100 SETNP/JNP succeeds
//
// LIR_eqd, if lsh != rhs;
// LIR_eqd, if lhs != rhs;
// ucomisd ZPC outcome (SETP/JP succeeds if P==0,
// SETE/JE succeeds if Z==0)
// ------- --- -------
@ -2810,13 +2834,10 @@ namespace nanojit
} else {
TEST_AH(mask);
FNSTSW_AX(); // requires rEAX to be free
if (rhs->isImmD())
{
if (rhs->isImmD()) {
const uint64_t* p = findImmDFromPool(rhs->immDasQ());
FCOMdm(pop, (const double*)p);
}
else
{
} else {
int d = findMemFor(rhs);
FCOM(pop, d, FP);
}

View File

@ -199,9 +199,12 @@ namespace nanojit
void asm_farg(LIns*, int32_t& stkd);\
void asm_arg(ArgType ty, LIns* p, Register r, int32_t& stkd);\
void asm_pusharg(LIns*);\
void asm_cmpd(LIns *cond);\
NIns* asm_branchd(bool, LIns*, NIns*);\
void asm_cmp(LIns *cond); \
void asm_cmpi(LIns *cond); \
void asm_cmpd(LIns *cond);\
NIns* asm_branch_helper(bool, LIns* cond, NIns*);\
NIns* asm_branchi_helper(bool, LIns* cond, NIns*);\
NIns* asm_branchd_helper(bool, LIns* cond, NIns*);\
void asm_div_mod(LIns *cond); \
void asm_load(int d, Register r); \
void asm_immd(Register r, uint64_t q, double d, bool canClobberCCs); \
@ -429,7 +432,6 @@ namespace nanojit
void FCHS(); \
void FLD1(); \
void FLDZ(); \
void FFREE(Register r); \
void FST32(bool p, int32_t d, Register b); \
void FSTQ(bool p, int32_t d, Register b); \
void FSTPQ(int32_t d, Register b); \
@ -451,7 +453,6 @@ namespace nanojit
void FSUBRdm(const double* dm); \
void FMULdm( const double* dm); \
void FDIVRdm(const double* dm); \
void FINCSTP(); \
void FSTP(Register r) { \
count_fpu(); \
FPU(0xddd8, r); \