Bug 1065339 - IonMonkey: Use vblendvps for SIMD minNum/maxNum r=jandem

This commit is contained in:
Dan Gohman 2014-12-08 18:20:30 -08:00
parent 042838584e
commit 1dffcd018f
3 changed files with 78 additions and 12 deletions

View File

@ -2234,6 +2234,23 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_CRASH("unexpected operand kind");
}
}
void vblendvps(FloatRegister mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
masm.vblendvps_rr(mask.code(), src1.code(), src0.code(), dest.code());
}
void vblendvps(FloatRegister mask, const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
switch (src1.kind()) {
case Operand::FPREG:
masm.vblendvps_rr(mask.code(), src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.vblendvps_mr(mask.code(), src1.disp(), src1.base(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void movsldup(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE3());
masm.movsldup_rr(src.code(), dest.code());

View File

@ -3788,6 +3788,9 @@ public:
void vblendvps_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
vblendvOpSimd(mask, src1, src0, dst);
}
void vblendvps_mr(XMMRegisterID mask, int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) {
vblendvOpSimd(mask, offset, base, src0, dst);
}
void movsldup_rr(XMMRegisterID src, XMMRegisterID dst)
{
@ -4397,6 +4400,25 @@ private:
mask, (RegisterID)rm, src0, dst);
}
void vblendvOpSimd(XMMRegisterID mask, int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
if (useLegacySSEEncodingForVblendv(mask, src0, dst)) {
spew("blendvps %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(src0));
// Even though a "ps" instruction, vblendv is encoded with the "pd" prefix.
m_formatter.legacySSEPrefix(VEX_PD);
m_formatter.threeByteOp(OP3_BLENDVPS_VdqWdq, ESCAPE_BLENDVPS, offset, base, src0);
return;
}
spew("vblendvps %s, %s0x%x(%s), %s, %s",
nameFPReg(mask), PRETTY_PRINT_OFFSET(offset), nameIReg(base),
nameFPReg(src0), nameFPReg(dst));
// Even though a "ps" instruction, vblendv is encoded with the "pd" prefix.
m_formatter.vblendvOpVex(VEX_PD, OP3_VBLENDVPS_VdqWdq, ESCAPE_VBLENDVPS,
mask, offset, base, src0, dst);
}
#ifdef JS_CODEGEN_X64
void twoByteOpSimd64(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
@ -4733,6 +4755,21 @@ private:
immediate8(mask << 4);
}
void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape,
XMMRegisterID mask, int offset, RegisterID base, XMMRegisterID src0, int reg)
{
int r = (reg >> 3), x = 0, b = (base >> 3);
int m = 0, w = 0, v = src0, l = 0;
switch (escape) {
case 0x38: m = 2; break; // 0x0F 0x38
case 0x3A: m = 3; break; // 0x0F 0x3A
default: MOZ_CRASH("unexpected escape");
}
threeOpVex(ty, r, x, b, m, w, v, l, opcode);
memoryModRM(offset, base, reg);
immediate8(mask << 4);
}
#ifdef JS_CODEGEN_X64
// Quad-word-sized operands:
//

View File

@ -2704,12 +2704,18 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
masm.vcmpneqps(rhs, rhsCopy, mask);
// Emulates blendv
if (lhs != output)
masm.movaps(lhs, output);
masm.andps(Operand(mask), output);
masm.andnps(Operand(tmp), mask);
masm.orps(Operand(mask), output);
if (AssemblerX86Shared::HasAVX()) {
masm.vblendvps(mask, lhs, tmp, output);
} else {
// Emulate vblendvps.
// With SSE.4.1 we could use blendvps, however it's awkward since
// it requires the mask to be in xmm0.
if (lhs != output)
masm.movaps(lhs, output);
masm.andps(Operand(mask), output);
masm.andnps(Operand(tmp), mask);
masm.orps(Operand(mask), output);
}
return;
}
case MSimdBinaryArith::MaxNum: {
@ -2732,12 +2738,18 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
masm.vcmpneqps(rhs, rhsCopy, mask);
// Emulates blendv
if (lhs != output)
masm.movaps(lhs, output);
masm.andps(Operand(mask), output);
masm.andnps(Operand(tmp), mask);
masm.orps(Operand(mask), output);
if (AssemblerX86Shared::HasAVX()) {
masm.vblendvps(mask, lhs, tmp, output);
} else {
// Emulate vblendvps.
// With SSE.4.1 we could use blendvps, however it's awkward since
// it requires the mask to be in xmm0.
if (lhs != output)
masm.movaps(lhs, output);
masm.andps(Operand(mask), output);
masm.andnps(Operand(tmp), mask);
masm.orps(Operand(mask), output);
}
return;
}
}