mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-12 21:05:36 +00:00
Bug 898468 - IonMonkey: Micro-optimize floating-point min and max using x86's minsd and maxsd instructions.
This commit is contained in:
parent
8e003bad6f
commit
3335f3af24
@ -294,7 +294,9 @@ private:
|
||||
OP2_CVTSS2SD_VsdEd = 0x5A,
|
||||
OP2_CVTSD2SS_VsdEd = 0x5A,
|
||||
OP2_SUBSD_VsdWsd = 0x5C,
|
||||
OP2_MINSD_VsdWsd = 0x5D,
|
||||
OP2_DIVSD_VsdWsd = 0x5E,
|
||||
OP2_MAXSD_VsdWsd = 0x5F,
|
||||
OP2_SQRTSD_VsdWsd = 0x51,
|
||||
OP2_ANDPD_VpdWpd = 0x54,
|
||||
OP2_ORPD_VpdWpd = 0x56,
|
||||
@ -2603,6 +2605,38 @@ public:
|
||||
m_formatter.immediate8(0x01); // the $1
|
||||
}
|
||||
|
||||
void minsd_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("minsd %s, %s",
|
||||
nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_F2);
|
||||
m_formatter.twoByteOp(OP2_MINSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
|
||||
}
|
||||
|
||||
void minsd_mr(int offset, RegisterID base, XMMRegisterID dst)
|
||||
{
|
||||
spew("minsd %s0x%x(%s), %s",
|
||||
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_F2);
|
||||
m_formatter.twoByteOp(OP2_MINSD_VsdWsd, (RegisterID)dst, base, offset);
|
||||
}
|
||||
|
||||
void maxsd_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("maxsd %s, %s",
|
||||
nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_F2);
|
||||
m_formatter.twoByteOp(OP2_MAXSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
|
||||
}
|
||||
|
||||
void maxsd_mr(int offset, RegisterID base, XMMRegisterID dst)
|
||||
{
|
||||
spew("maxsd %s0x%x(%s), %s",
|
||||
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_F2);
|
||||
m_formatter.twoByteOp(OP2_MAXSD_VsdWsd, (RegisterID)dst, base, offset);
|
||||
}
|
||||
|
||||
// Misc instructions:
|
||||
|
||||
void int3()
|
||||
|
@ -1264,6 +1264,40 @@ class AssemblerX86Shared
|
||||
JS_ASSERT(HasSSE41());
|
||||
masm.roundsd_rr(src.code(), dest.code(), mode);
|
||||
}
|
||||
void minsd(const FloatRegister &src, const FloatRegister &dest) {
|
||||
JS_ASSERT(HasSSE2());
|
||||
masm.minsd_rr(src.code(), dest.code());
|
||||
}
|
||||
void minsd(const Operand &src, const FloatRegister &dest) {
|
||||
JS_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.minsd_rr(src.fpu(), dest.code());
|
||||
break;
|
||||
case Operand::REG_DISP:
|
||||
masm.minsd_mr(src.disp(), src.base(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void maxsd(const FloatRegister &src, const FloatRegister &dest) {
|
||||
JS_ASSERT(HasSSE2());
|
||||
masm.maxsd_rr(src.code(), dest.code());
|
||||
}
|
||||
void maxsd(const Operand &src, const FloatRegister &dest) {
|
||||
JS_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.maxsd_rr(src.fpu(), dest.code());
|
||||
break;
|
||||
case Operand::REG_DISP:
|
||||
masm.maxsd_mr(src.disp(), src.base(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void fisttp(const Operand &dest) {
|
||||
JS_ASSERT(HasSSE3());
|
||||
switch (dest.kind()) {
|
||||
|
@ -379,35 +379,40 @@ CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD *ins)
|
||||
|
||||
JS_ASSERT(first == output);
|
||||
|
||||
Assembler::Condition cond = ins->mir()->isMax()
|
||||
? Assembler::Above
|
||||
: Assembler::Below;
|
||||
Label nan, equal, returnSecond, done;
|
||||
Label done, nan, minMaxInst;
|
||||
|
||||
masm.ucomisd(second, first);
|
||||
masm.j(Assembler::Parity, &nan); // first or second is NaN, result is NaN.
|
||||
masm.j(Assembler::Equal, &equal); // make sure we handle -0 and 0 right.
|
||||
masm.j(cond, &returnSecond);
|
||||
masm.jmp(&done);
|
||||
// Do a ucomisd to catch equality and NaNs, which both require special
|
||||
// handling. If the operands are ordered and inequal, we branch straight to
|
||||
// the min/max instruction. If we wanted, we could also branch for less-than
|
||||
// or greater-than here instead of using min/max, however these conditions
|
||||
// will sometimes be hard on the branch predictor.
|
||||
masm.ucomisd(first, second);
|
||||
masm.j(Assembler::NotEqual, &minMaxInst);
|
||||
masm.j(Assembler::Parity, &nan);
|
||||
|
||||
// Check for zero.
|
||||
masm.bind(&equal);
|
||||
masm.xorpd(ScratchFloatReg, ScratchFloatReg);
|
||||
masm.ucomisd(first, ScratchFloatReg);
|
||||
masm.j(Assembler::NotEqual, &done); // first wasn't 0 or -0, so just return it.
|
||||
// So now both operands are either -0 or 0.
|
||||
// Ordered and equal. The operands are bit-identical unless they are zero
|
||||
// and is negative zero. These instructions merge the sign bits in that
|
||||
// case, and are no-ops otherwise.
|
||||
if (ins->mir()->isMax())
|
||||
masm.addsd(second, first); // -0 + -0 = -0 and -0 + 0 = 0.
|
||||
masm.andpd(second, first);
|
||||
else
|
||||
masm.orpd(second, first); // This just ors the sign bit.
|
||||
masm.jmp(&done);
|
||||
masm.orpd(second, first);
|
||||
masm.jump(&done);
|
||||
|
||||
// x86's min/max are not symmetric; if either operand is a NaN, they return
|
||||
// the read-only operand. We need to return a NaN if either operand is a
|
||||
// NaN, so we explicitly check for a NaN in the read-write operand.
|
||||
masm.bind(&nan);
|
||||
masm.loadStaticDouble(&js_NaN, output);
|
||||
masm.jmp(&done);
|
||||
masm.ucomisd(first, first);
|
||||
masm.j(Assembler::Parity, &done);
|
||||
|
||||
masm.bind(&returnSecond);
|
||||
masm.movsd(second, output);
|
||||
// When the values are inequal, or second is NaN, x86's min and max will
|
||||
// return the value we need.
|
||||
masm.bind(&minMaxInst);
|
||||
if (ins->mir()->isMax())
|
||||
masm.maxsd(second, first);
|
||||
else
|
||||
masm.minsd(second, first);
|
||||
|
||||
masm.bind(&done);
|
||||
return true;
|
||||
|
Loading…
Reference in New Issue
Block a user