mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-07 12:15:51 +00:00
Bug 1065339 - IonMonkey: Use vblendvps for SIMD minNum/maxNum r=jandem
This commit is contained in:
parent
042838584e
commit
1dffcd018f
@ -2234,6 +2234,23 @@ class AssemblerX86Shared : public AssemblerShared
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vblendvps(FloatRegister mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
masm.vblendvps_rr(mask.code(), src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void vblendvps(FloatRegister mask, const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.vblendvps_rr(mask.code(), src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vblendvps_mr(mask.code(), src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void movsldup(FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE3());
|
||||
masm.movsldup_rr(src.code(), dest.code());
|
||||
|
@ -3788,6 +3788,9 @@ public:
|
||||
void vblendvps_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
|
||||
vblendvOpSimd(mask, src1, src0, dst);
|
||||
}
|
||||
void vblendvps_mr(XMMRegisterID mask, int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) {
|
||||
vblendvOpSimd(mask, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void movsldup_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
@ -4397,6 +4400,25 @@ private:
|
||||
mask, (RegisterID)rm, src0, dst);
|
||||
}
|
||||
|
||||
void vblendvOpSimd(XMMRegisterID mask, int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncodingForVblendv(mask, src0, dst)) {
|
||||
spew("blendvps %s0x%x(%s), %s",
|
||||
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(src0));
|
||||
// Even though a "ps" instruction, vblendv is encoded with the "pd" prefix.
|
||||
m_formatter.legacySSEPrefix(VEX_PD);
|
||||
m_formatter.threeByteOp(OP3_BLENDVPS_VdqWdq, ESCAPE_BLENDVPS, offset, base, src0);
|
||||
return;
|
||||
}
|
||||
|
||||
spew("vblendvps %s, %s0x%x(%s), %s, %s",
|
||||
nameFPReg(mask), PRETTY_PRINT_OFFSET(offset), nameIReg(base),
|
||||
nameFPReg(src0), nameFPReg(dst));
|
||||
// Even though a "ps" instruction, vblendv is encoded with the "pd" prefix.
|
||||
m_formatter.vblendvOpVex(VEX_PD, OP3_VBLENDVPS_VdqWdq, ESCAPE_VBLENDVPS,
|
||||
mask, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
#ifdef JS_CODEGEN_X64
|
||||
void twoByteOpSimd64(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
|
||||
XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
|
||||
@ -4733,6 +4755,21 @@ private:
|
||||
immediate8(mask << 4);
|
||||
}
|
||||
|
||||
void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape,
|
||||
XMMRegisterID mask, int offset, RegisterID base, XMMRegisterID src0, int reg)
|
||||
{
|
||||
int r = (reg >> 3), x = 0, b = (base >> 3);
|
||||
int m = 0, w = 0, v = src0, l = 0;
|
||||
switch (escape) {
|
||||
case 0x38: m = 2; break; // 0x0F 0x38
|
||||
case 0x3A: m = 3; break; // 0x0F 0x3A
|
||||
default: MOZ_CRASH("unexpected escape");
|
||||
}
|
||||
threeOpVex(ty, r, x, b, m, w, v, l, opcode);
|
||||
memoryModRM(offset, base, reg);
|
||||
immediate8(mask << 4);
|
||||
}
|
||||
|
||||
#ifdef JS_CODEGEN_X64
|
||||
// Quad-word-sized operands:
|
||||
//
|
||||
|
@ -2704,12 +2704,18 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
|
||||
FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
|
||||
masm.vcmpneqps(rhs, rhsCopy, mask);
|
||||
|
||||
// Emulates blendv
|
||||
if (lhs != output)
|
||||
masm.movaps(lhs, output);
|
||||
masm.andps(Operand(mask), output);
|
||||
masm.andnps(Operand(tmp), mask);
|
||||
masm.orps(Operand(mask), output);
|
||||
if (AssemblerX86Shared::HasAVX()) {
|
||||
masm.vblendvps(mask, lhs, tmp, output);
|
||||
} else {
|
||||
// Emulate vblendvps.
|
||||
// With SSE.4.1 we could use blendvps, however it's awkward since
|
||||
// it requires the mask to be in xmm0.
|
||||
if (lhs != output)
|
||||
masm.movaps(lhs, output);
|
||||
masm.andps(Operand(mask), output);
|
||||
masm.andnps(Operand(tmp), mask);
|
||||
masm.orps(Operand(mask), output);
|
||||
}
|
||||
return;
|
||||
}
|
||||
case MSimdBinaryArith::MaxNum: {
|
||||
@ -2732,12 +2738,18 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
|
||||
FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
|
||||
masm.vcmpneqps(rhs, rhsCopy, mask);
|
||||
|
||||
// Emulates blendv
|
||||
if (lhs != output)
|
||||
masm.movaps(lhs, output);
|
||||
masm.andps(Operand(mask), output);
|
||||
masm.andnps(Operand(tmp), mask);
|
||||
masm.orps(Operand(mask), output);
|
||||
if (AssemblerX86Shared::HasAVX()) {
|
||||
masm.vblendvps(mask, lhs, tmp, output);
|
||||
} else {
|
||||
// Emulate vblendvps.
|
||||
// With SSE.4.1 we could use blendvps, however it's awkward since
|
||||
// it requires the mask to be in xmm0.
|
||||
if (lhs != output)
|
||||
masm.movaps(lhs, output);
|
||||
masm.andps(Operand(mask), output);
|
||||
masm.andnps(Operand(tmp), mask);
|
||||
masm.orps(Operand(mask), output);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user