Make LIR_ov work for LIR_mul on ARM. (bug 521161, r=gal)

This commit is contained in:
Jacob Bramley 2009-11-02 09:35:01 +00:00
parent ad303a4bd9
commit 146c5ecf6b
3 changed files with 96 additions and 19 deletions

View File

@ -8093,13 +8093,11 @@ TraceRecorder::alu(LOpcode v, jsdouble v0, jsdouble v1, LIns* s0, LIns* s1)
case LIR_fsub:
r = v0 - v1;
break;
#if !defined NANOJIT_ARM
case LIR_fmul:
r = v0 * v1;
if (r == 0.0)
goto out;
break;
#endif
#if defined NANOJIT_IA32 || defined NANOJIT_X64
case LIR_fdiv:
if (v1 == 0)

View File

@ -1857,6 +1857,14 @@ Assembler::asm_branch(bool branchOnFalse, LInsp cond, NIns* targ)
// Detect whether or not this is a floating-point comparison.
bool fp_cond;
// Because MUL can't set the V flag, we use SMULL and CMP to set the Z flag
// to detect overflow on multiply. Thus, if cond points to a LIR_ov which
// in turn points to a LIR_mul, we must be conditional on !Z, not V.
if ((condop == LIR_ov) && (cond->oprnd1()->isop(LIR_mul))) {
condop = LIR_eq;
branchOnFalse = !branchOnFalse;
}
// Select the appropriate ARM condition code to match the LIR instruction.
switch (condop)
{
@ -1991,10 +1999,11 @@ void
Assembler::asm_cond(LInsp ins)
{
Register r = prepResultReg(ins, AllowableFlagRegs);
switch(ins->opcode())
LOpcode op = ins->opcode();
switch(op)
{
case LIR_eq: SETEQ(r); break;
case LIR_ov: SETVS(r); break;
case LIR_lt: SETLT(r); break;
case LIR_le: SETLE(r); break;
case LIR_gt: SETGT(r); break;
@ -2003,6 +2012,17 @@ Assembler::asm_cond(LInsp ins)
case LIR_ule: SETLS(r); break;
case LIR_ugt: SETHI(r); break;
case LIR_uge: SETHS(r); break;
case LIR_ov:
// Because MUL can't set the V flag, we use SMULL and CMP to set
// the Z flag to detect overflow on multiply. Thus, if ins points
// to a LIR_ov which in turn points to a LIR_mul, we must be
// conditional on !Z, not V.
if (!ins->oprnd1()->isop(LIR_mul)) {
SETVS(r);
} else {
SETNE(r);
}
break;
default: NanoAssert(0); break;
}
asm_cmp(ins);
@ -2106,12 +2126,19 @@ Assembler::asm_arith(LInsp ins)
//
// We try to use rb as the first operand by default because it is
// common for (rr == ra) and is thus likely to be the most
// efficient case; if ra is no longer used after this LIR
// instruction, it is re-used for the result register (rr).
// efficient method.
if ((ARM_ARCH > 5) || (rr != rb)) {
// Newer cores place no restrictions on the registers used in a
// MUL instruction (compared to other arithmetic instructions).
MUL(rr, rb, ra);
// IP is used to temporarily store the high word of the result from
// SMULL, so we make use of this to perform an overflow check, as
// ARM's MUL instruction can't set the overflow flag by itself.
// We can check for overflow using the following:
// SMULL rr, ip, ra, rb
// CMP ip, rr, ASR #31
// An explanation can be found in bug 521161. This sets Z if we did
// _not_ overflow, and clears it if we did.
ALUr_shi(AL, cmp, 1, IP, IP, rr, ASR_imm, 31);
SMULL(rr, IP, rb, ra);
} else {
// ARM_ARCH is ARMv5 (or below) and rr == rb, so we must
// find a different way to encode the instruction.
@ -2120,19 +2147,40 @@ Assembler::asm_arith(LInsp ins)
if (rr != ra) {
// We know that rr == rb, so this will be something like
// rX = rY * rX.
MUL(rr, ra, rb);
// Other than swapping ra and rb, this works in the same as
// as the ARMv6+ case, above.
ALUr_shi(AL, cmp, 1, IP, IP, rr, ASR_imm, 31);
SMULL(rr, IP, ra, rb);
} else {
// We're trying to do rX = rX * rX, so we must use a
// temporary register to achieve this correctly on ARMv5.
// We're trying to do rX = rX * rX, but we also need to
// check for overflow so we would need two extra registers
// on ARMv5 and below. We achieve this by observing the
// following:
// - abs(rX)*abs(rX) = rX*rX, so we force the input to be
// positive to simplify the detection logic.
// - Any argument greater than 0xffff will _always_
// overflow, and we can easily check that the top 16
// bits are zero.
// - Any argument lower than (or equal to) 0xffff that
// also overflows is guaranteed to set output bit 31.
//
// Thus, we know we have _not_ overflowed if:
// abs(rX)&0xffff0000 == 0 AND result[31] == 0
//
// The following instruction sequence will be emitted:
// MOVS IP, rX // Put abs(rX) into IP.
// RSBMI IP, IP, #0 // ...
// MUL rX, IP, IP // Do the actual multiplication.
// MOVS IP, IP, LSR #16 // Check that abs(arg)<=0xffff
// CMPEQ IP, rX, ASR #31 // Check that result[31] == 0
// The register allocator will never allocate IP so it will
// be safe to use here.
NanoAssert(ra != IP);
NanoAssert(rr != IP);
// In this case, rr == ra == rb.
MUL(rr, IP, rb);
MOV(IP, ra);
ALUr_shi(AL, cmp, 1, IP, rr, rr, ASR_imm, 31);
ALUr_shi(AL, mov, 1, IP, IP, IP, LSR_imm, 16);
MUL(rr, IP, IP);
ALUi(MI, rsb, 0, IP, IP, 0);
ALUr(AL, mov, 1, IP, ra, ra);
}
}
break;
@ -2229,7 +2277,6 @@ Assembler::asm_cmov(LInsp ins)
switch (condval->opcode()) {
// note that these are all opposites...
case LIR_eq: MOVNE(rr, iffalsereg); break;
case LIR_ov: MOVVC(rr, iffalsereg); break;
case LIR_lt: MOVGE(rr, iffalsereg); break;
case LIR_le: MOVGT(rr, iffalsereg); break;
case LIR_gt: MOVLE(rr, iffalsereg); break;
@ -2238,6 +2285,17 @@ Assembler::asm_cmov(LInsp ins)
case LIR_ule: MOVHI(rr, iffalsereg); break;
case LIR_ugt: MOVLS(rr, iffalsereg); break;
case LIR_uge: MOVLO(rr, iffalsereg); break;
case LIR_ov:
// Because MUL can't set the V flag, we use SMULL and CMP to set
// the Z flag to detect overflow on multiply. Thus, if ins points
// to a LIR_ov which in turn points to a LIR_mul, we must be
// conditional on !Z, not V.
if (!condval->oprnd1()->isop(LIR_mul)) {
MOVVC(rr, iffalsereg);
} else {
MOVEQ(rr, iffalsereg);
}
break;
default: debug_only( NanoAssert(0) ); break;
}
/*const Register iftruereg =*/ findSpecificRegFor(iftrue, rr);

View File

@ -477,6 +477,26 @@ enum {
// Other operations.
// --------
// [_d_hi,_d] = _l * _r
#define SMULL_dont_check_op1(_d, _d_hi, _l, _r) do { \
underrunProtect(4); \
NanoAssert((ARM_ARCH >= 6) || ((_d) != (_l))); \
NanoAssert(IsGpReg(_d) && IsGpReg(_d_hi) && IsGpReg(_l) && IsGpReg(_r)); \
NanoAssert(((_d) != PC) && ((_d_hi) != PC) && ((_l) != PC) && ((_r) != PC));\
*(--_nIns) = (NIns)( COND_AL | 0xc00090 | (_d_hi)<<16 | (_d)<<12 | (_r)<<8 | (_l) );\
asm_output("smull %s, %s, %s, %s",gpn(_d),gpn(_d_hi),gpn(_l),gpn(_r)); \
} while(0)
#if NJ_ARM_ARCH >= NJ_ARM_V6
#define SMULL(_d, _d_hi, _l, _r) SMULL_dont_check_op1(_d, _d_hi, _l, _r)
#else
#define SMULL(_d, _d_hi, _l, _r) do { \
NanoAssert( (_d)!=(_l)); \
NanoAssert((_d_hi)!=(_l)); \
SMULL_dont_check_op1(_d, _d_hi, _l, _r); \
} while(0)
#endif
// _d = _l * _r
#define MUL_dont_check_op1(_d, _l, _r) do { \
underrunProtect(4); \
@ -727,6 +747,7 @@ enum {
} while (0)
#define SETEQ(r) SET(r,EQ)
#define SETNE(r) SET(r,NE)
#define SETLT(r) SET(r,LT)
#define SETLE(r) SET(r,LE)
#define SETGT(r) SET(r,GT)