Bug 898468 - IonMonkey: Micro-optimize floating-point min and max using x86's minsd and maxsd instructions.

This commit is contained in:
Dan Gohman 2013-08-01 13:34:48 -07:00
parent 8e003bad6f
commit 3335f3af24
3 changed files with 95 additions and 22 deletions

View File

@ -294,7 +294,9 @@ private:
OP2_CVTSS2SD_VsdEd = 0x5A,
OP2_CVTSD2SS_VsdEd = 0x5A,
OP2_SUBSD_VsdWsd = 0x5C,
OP2_MINSD_VsdWsd = 0x5D,
OP2_DIVSD_VsdWsd = 0x5E,
OP2_MAXSD_VsdWsd = 0x5F,
OP2_SQRTSD_VsdWsd = 0x51,
OP2_ANDPD_VpdWpd = 0x54,
OP2_ORPD_VpdWpd = 0x56,
@ -2603,6 +2605,38 @@ public:
m_formatter.immediate8(0x01); // the $1
}
void minsd_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("minsd %s, %s",
nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_MINSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
}
void minsd_mr(int offset, RegisterID base, XMMRegisterID dst)
{
spew("minsd %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_MINSD_VsdWsd, (RegisterID)dst, base, offset);
}
void maxsd_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("maxsd %s, %s",
nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_MAXSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
}
void maxsd_mr(int offset, RegisterID base, XMMRegisterID dst)
{
spew("maxsd %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_MAXSD_VsdWsd, (RegisterID)dst, base, offset);
}
// Misc instructions:
void int3()

View File

@ -1264,6 +1264,40 @@ class AssemblerX86Shared
JS_ASSERT(HasSSE41());
masm.roundsd_rr(src.code(), dest.code(), mode);
}
void minsd(const FloatRegister &src, const FloatRegister &dest) {
JS_ASSERT(HasSSE2());
masm.minsd_rr(src.code(), dest.code());
}
void minsd(const Operand &src, const FloatRegister &dest) {
JS_ASSERT(HasSSE2());
switch (src.kind()) {
case Operand::FPREG:
masm.minsd_rr(src.fpu(), dest.code());
break;
case Operand::REG_DISP:
masm.minsd_mr(src.disp(), src.base(), dest.code());
break;
default:
MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
}
}
void maxsd(const FloatRegister &src, const FloatRegister &dest) {
JS_ASSERT(HasSSE2());
masm.maxsd_rr(src.code(), dest.code());
}
void maxsd(const Operand &src, const FloatRegister &dest) {
JS_ASSERT(HasSSE2());
switch (src.kind()) {
case Operand::FPREG:
masm.maxsd_rr(src.fpu(), dest.code());
break;
case Operand::REG_DISP:
masm.maxsd_mr(src.disp(), src.base(), dest.code());
break;
default:
MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
}
}
void fisttp(const Operand &dest) {
JS_ASSERT(HasSSE3());
switch (dest.kind()) {

View File

@ -379,35 +379,40 @@ CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD *ins)
JS_ASSERT(first == output);
Assembler::Condition cond = ins->mir()->isMax()
? Assembler::Above
: Assembler::Below;
Label nan, equal, returnSecond, done;
Label done, nan, minMaxInst;
masm.ucomisd(second, first);
masm.j(Assembler::Parity, &nan); // first or second is NaN, result is NaN.
masm.j(Assembler::Equal, &equal); // make sure we handle -0 and 0 right.
masm.j(cond, &returnSecond);
masm.jmp(&done);
// Do a ucomisd to catch equality and NaNs, which both require special
// handling. If the operands are ordered and inequal, we branch straight to
// the min/max instruction. If we wanted, we could also branch for less-than
// or greater-than here instead of using min/max, however these conditions
// will sometimes be hard on the branch predictor.
masm.ucomisd(first, second);
masm.j(Assembler::NotEqual, &minMaxInst);
masm.j(Assembler::Parity, &nan);
// Check for zero.
masm.bind(&equal);
masm.xorpd(ScratchFloatReg, ScratchFloatReg);
masm.ucomisd(first, ScratchFloatReg);
masm.j(Assembler::NotEqual, &done); // first wasn't 0 or -0, so just return it.
// So now both operands are either -0 or 0.
// Ordered and equal. The operands are bit-identical unless they are zero
// and is negative zero. These instructions merge the sign bits in that
// case, and are no-ops otherwise.
if (ins->mir()->isMax())
masm.addsd(second, first); // -0 + -0 = -0 and -0 + 0 = 0.
masm.andpd(second, first);
else
masm.orpd(second, first); // This just ors the sign bit.
masm.jmp(&done);
masm.orpd(second, first);
masm.jump(&done);
// x86's min/max are not symmetric; if either operand is a NaN, they return
// the read-only operand. We need to return a NaN if either operand is a
// NaN, so we explicitly check for a NaN in the read-write operand.
masm.bind(&nan);
masm.loadStaticDouble(&js_NaN, output);
masm.jmp(&done);
masm.ucomisd(first, first);
masm.j(Assembler::Parity, &done);
masm.bind(&returnSecond);
masm.movsd(second, output);
// When the values are inequal, or second is NaN, x86's min and max will
// return the value we need.
masm.bind(&minMaxInst);
if (ins->mir()->isMax())
masm.maxsd(second, first);
else
masm.minsd(second, first);
masm.bind(&done);
return true;