mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-25 03:05:34 +00:00
Bug 1136226 - Unary functions for small integer SIMD types. r=bbouvier
- Implement 'not' and 'neg' for 8x16 and 16x8 types. - Rename some 'bitwiseFooX4' masm functions to 'bitwiseFooSimd128'. - Rename the zeroInt32x4 and zeroFloat32x4 to zeroSimd128{Int,Float}. - Add support for the paddb/paddw and psubb/psubw SSE2 instructions in the assembler.
This commit is contained in:
parent
43fd82ad3e
commit
62a36df31c
@ -4483,13 +4483,23 @@ LIRGenerator::visitSimdUnaryArith(MSimdUnaryArith* ins)
|
||||
// Cannot be at start, as the ouput is used as a temporary to store values.
|
||||
LUse in = use(ins->input());
|
||||
|
||||
if (ins->type() == MIRType::Int32x4 || ins->type() == MIRType::Bool32x4) {
|
||||
LSimdUnaryArithIx4* lir = new(alloc()) LSimdUnaryArithIx4(in);
|
||||
define(lir, ins);
|
||||
} else if (ins->type() == MIRType::Float32x4) {
|
||||
LSimdUnaryArithFx4* lir = new(alloc()) LSimdUnaryArithFx4(in);
|
||||
define(lir, ins);
|
||||
} else {
|
||||
switch (ins->type()) {
|
||||
case MIRType::Int8x16:
|
||||
case MIRType::Bool8x16:
|
||||
define(new (alloc()) LSimdUnaryArithIx16(in), ins);
|
||||
break;
|
||||
case MIRType::Int16x8:
|
||||
case MIRType::Bool16x8:
|
||||
define(new (alloc()) LSimdUnaryArithIx8(in), ins);
|
||||
break;
|
||||
case MIRType::Int32x4:
|
||||
case MIRType::Bool32x4:
|
||||
define(new (alloc()) LSimdUnaryArithIx4(in), ins);
|
||||
break;
|
||||
case MIRType::Float32x4:
|
||||
define(new (alloc()) LSimdUnaryArithFx4(in), ins);
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("Unknown SIMD kind for unary operation");
|
||||
}
|
||||
}
|
||||
|
@ -525,6 +525,22 @@ class LSimdUnaryArith : public LInstructionHelper<1, 1, 0>
|
||||
}
|
||||
};
|
||||
|
||||
// Unary SIMD arithmetic operation on a Int8x16 operand
|
||||
class LSimdUnaryArithIx16 : public LSimdUnaryArith
|
||||
{
|
||||
public:
|
||||
LIR_HEADER(SimdUnaryArithIx16);
|
||||
explicit LSimdUnaryArithIx16(const LAllocation& in) : LSimdUnaryArith(in) {}
|
||||
};
|
||||
|
||||
// Unary SIMD arithmetic operation on a Int16x8 operand
|
||||
class LSimdUnaryArithIx8 : public LSimdUnaryArith
|
||||
{
|
||||
public:
|
||||
LIR_HEADER(SimdUnaryArithIx8);
|
||||
explicit LSimdUnaryArithIx8(const LAllocation& in) : LSimdUnaryArith(in) {}
|
||||
};
|
||||
|
||||
// Unary SIMD arithmetic operation on a Int32x4 operand
|
||||
class LSimdUnaryArithIx4 : public LSimdUnaryArith
|
||||
{
|
||||
|
@ -37,6 +37,8 @@
|
||||
_(SimdSwizzleI) \
|
||||
_(SimdSwizzleF) \
|
||||
_(SimdShuffle) \
|
||||
_(SimdUnaryArithIx16) \
|
||||
_(SimdUnaryArithIx8) \
|
||||
_(SimdUnaryArithIx4) \
|
||||
_(SimdUnaryArithFx4) \
|
||||
_(SimdBinaryCompIx4) \
|
||||
|
@ -55,9 +55,13 @@ ABIArgGenerator::next(MIRType type)
|
||||
case MIRType::Double:
|
||||
current_ = ABIArg(FloatArgRegs[regIndex_++]);
|
||||
break;
|
||||
case MIRType::Bool32x4:
|
||||
case MIRType::Int8x16:
|
||||
case MIRType::Int16x8:
|
||||
case MIRType::Int32x4:
|
||||
case MIRType::Float32x4:
|
||||
case MIRType::Bool8x16:
|
||||
case MIRType::Bool16x8:
|
||||
case MIRType::Bool32x4:
|
||||
// On Win64, >64 bit args need to be passed by reference, but asm.js
|
||||
// doesn't allow passing SIMD values to FFIs. The only way to reach
|
||||
// here is asm to asm calls, so we can break the ABI here.
|
||||
@ -91,9 +95,13 @@ ABIArgGenerator::next(MIRType type)
|
||||
else
|
||||
current_ = ABIArg(FloatArgRegs[floatRegIndex_++]);
|
||||
break;
|
||||
case MIRType::Bool32x4:
|
||||
case MIRType::Int8x16:
|
||||
case MIRType::Int16x8:
|
||||
case MIRType::Int32x4:
|
||||
case MIRType::Float32x4:
|
||||
case MIRType::Bool8x16:
|
||||
case MIRType::Bool16x8:
|
||||
case MIRType::Bool32x4:
|
||||
if (floatRegIndex_ == NumFloatArgRegs) {
|
||||
stackOffset_ = AlignBytes(stackOffset_, SimdMemoryAlignment);
|
||||
current_ = ABIArg(stackOffset_);
|
||||
|
@ -2439,6 +2439,70 @@ class AssemblerX86Shared : public AssemblerShared
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpaddb(const Operand& src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.vpaddb_rr(src1.fpu(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vpaddb_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.vpaddb_mr(src1.address(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpsubb(const Operand& src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.vpsubb_rr(src1.fpu(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vpsubb_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.vpsubb_mr(src1.address(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpaddw(const Operand& src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.vpaddw_rr(src1.fpu(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vpaddw_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.vpaddw_mr(src1.address(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpsubw(const Operand& src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.vpsubw_rr(src1.fpu(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vpsubw_mr(src1.disp(), src1.base(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.vpsubw_mr(src1.address(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpaddd(const Operand& src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
|
@ -583,6 +583,32 @@ public:
|
||||
m_formatter.twoByteOp(OP2_XADD_EvGv, offset, base, index, scale, srcdest);
|
||||
}
|
||||
|
||||
void vpaddb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
void vpaddb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, offset, base, src0, dst);
|
||||
}
|
||||
void vpaddb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, address, src0, dst);
|
||||
}
|
||||
|
||||
void vpaddw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
void vpaddw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, offset, base, src0, dst);
|
||||
}
|
||||
void vpaddw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, address, src0, dst);
|
||||
}
|
||||
|
||||
void vpaddd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, src1, src0, dst);
|
||||
@ -596,6 +622,32 @@ public:
|
||||
twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, address, src0, dst);
|
||||
}
|
||||
|
||||
void vpsubb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
void vpsubb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, offset, base, src0, dst);
|
||||
}
|
||||
void vpsubb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, address, src0, dst);
|
||||
}
|
||||
|
||||
void vpsubw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
void vpsubw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, offset, base, src0, dst);
|
||||
}
|
||||
void vpsubw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, address, src0, dst);
|
||||
}
|
||||
|
||||
void vpsubd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, src1, src0, dst);
|
||||
|
@ -2533,14 +2533,14 @@ CodeGeneratorX86Shared::visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins)
|
||||
// We can identify A-lanes by the sign bits in A: Any A-lanes will be
|
||||
// positive in A, and N, B, and V-lanes will be 0x80000000 in A. Compute a
|
||||
// mask of non-A-lanes into |tempF|.
|
||||
masm.zeroFloat32x4(tempF);
|
||||
masm.zeroSimd128Float(tempF);
|
||||
masm.packedGreaterThanInt32x4(Operand(out), tempF);
|
||||
|
||||
// Clear the A-lanes in B.
|
||||
masm.bitwiseAndX4(Operand(tempF), scratch);
|
||||
masm.bitwiseAndSimd128(Operand(tempF), scratch);
|
||||
|
||||
// Compute the final result: A for A-lanes, A|B for B-lanes.
|
||||
masm.bitwiseOrX4(Operand(scratch), out);
|
||||
masm.bitwiseOrSimd128(Operand(scratch), out);
|
||||
|
||||
// We still need to filter out the V-lanes. They would show up as 0x80000000
|
||||
// in both A and B. Since we cleared the valid A-lanes in B, the V-lanes are
|
||||
@ -3315,7 +3315,7 @@ CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4* ins)
|
||||
// if that's what it's used in.
|
||||
masm.loadConstantSimd128Int(allOnes, scratch);
|
||||
masm.packedEqualInt32x4(rhs, lhs);
|
||||
masm.bitwiseXorX4(Operand(scratch), lhs);
|
||||
masm.bitwiseXorSimd128(Operand(scratch), lhs);
|
||||
return;
|
||||
case MSimdBinaryComp::greaterThanOrEqual:
|
||||
// src := rhs
|
||||
@ -3325,13 +3325,13 @@ CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4* ins)
|
||||
masm.loadAlignedSimd128Int(rhs, scratch);
|
||||
masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch);
|
||||
masm.loadConstantSimd128Int(allOnes, lhs);
|
||||
masm.bitwiseXorX4(Operand(scratch), lhs);
|
||||
masm.bitwiseXorSimd128(Operand(scratch), lhs);
|
||||
return;
|
||||
case MSimdBinaryComp::lessThanOrEqual:
|
||||
// lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
|
||||
masm.loadConstantSimd128Int(allOnes, scratch);
|
||||
masm.packedGreaterThanInt32x4(rhs, lhs);
|
||||
masm.bitwiseXorX4(Operand(scratch), lhs);
|
||||
masm.bitwiseXorSimd128(Operand(scratch), lhs);
|
||||
return;
|
||||
}
|
||||
MOZ_CRASH("unexpected SIMD op");
|
||||
@ -3534,6 +3534,58 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4* ins)
|
||||
MOZ_CRASH("unexpected SIMD op");
|
||||
}
|
||||
|
||||
void
|
||||
CodeGeneratorX86Shared::visitSimdUnaryArithIx16(LSimdUnaryArithIx16* ins)
|
||||
{
|
||||
Operand in = ToOperand(ins->input());
|
||||
FloatRegister out = ToFloatRegister(ins->output());
|
||||
|
||||
static const SimdConstant allOnes = SimdConstant::SplatX16(-1);
|
||||
|
||||
switch (ins->operation()) {
|
||||
case MSimdUnaryArith::neg:
|
||||
masm.zeroSimd128Int(out);
|
||||
masm.packedSubInt8(in, out);
|
||||
return;
|
||||
case MSimdUnaryArith::not_:
|
||||
masm.loadConstantSimd128Int(allOnes, out);
|
||||
masm.bitwiseXorSimd128(in, out);
|
||||
return;
|
||||
case MSimdUnaryArith::abs:
|
||||
case MSimdUnaryArith::reciprocalApproximation:
|
||||
case MSimdUnaryArith::reciprocalSqrtApproximation:
|
||||
case MSimdUnaryArith::sqrt:
|
||||
break;
|
||||
}
|
||||
MOZ_CRASH("unexpected SIMD op");
|
||||
}
|
||||
|
||||
void
|
||||
CodeGeneratorX86Shared::visitSimdUnaryArithIx8(LSimdUnaryArithIx8* ins)
|
||||
{
|
||||
Operand in = ToOperand(ins->input());
|
||||
FloatRegister out = ToFloatRegister(ins->output());
|
||||
|
||||
static const SimdConstant allOnes = SimdConstant::SplatX8(-1);
|
||||
|
||||
switch (ins->operation()) {
|
||||
case MSimdUnaryArith::neg:
|
||||
masm.zeroSimd128Int(out);
|
||||
masm.packedSubInt16(in, out);
|
||||
return;
|
||||
case MSimdUnaryArith::not_:
|
||||
masm.loadConstantSimd128Int(allOnes, out);
|
||||
masm.bitwiseXorSimd128(in, out);
|
||||
return;
|
||||
case MSimdUnaryArith::abs:
|
||||
case MSimdUnaryArith::reciprocalApproximation:
|
||||
case MSimdUnaryArith::reciprocalSqrtApproximation:
|
||||
case MSimdUnaryArith::sqrt:
|
||||
break;
|
||||
}
|
||||
MOZ_CRASH("unexpected SIMD op");
|
||||
}
|
||||
|
||||
void
|
||||
CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4* ins)
|
||||
{
|
||||
@ -3544,12 +3596,12 @@ CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4* ins)
|
||||
|
||||
switch (ins->operation()) {
|
||||
case MSimdUnaryArith::neg:
|
||||
masm.zeroInt32x4(out);
|
||||
masm.zeroSimd128Int(out);
|
||||
masm.packedSubInt32(in, out);
|
||||
return;
|
||||
case MSimdUnaryArith::not_:
|
||||
masm.loadConstantSimd128Int(allOnes, out);
|
||||
masm.bitwiseXorX4(in, out);
|
||||
masm.bitwiseXorSimd128(in, out);
|
||||
return;
|
||||
case MSimdUnaryArith::abs:
|
||||
case MSimdUnaryArith::reciprocalApproximation:
|
||||
@ -3580,15 +3632,15 @@ CodeGeneratorX86Shared::visitSimdUnaryArithFx4(LSimdUnaryArithFx4* ins)
|
||||
switch (ins->operation()) {
|
||||
case MSimdUnaryArith::abs:
|
||||
masm.loadConstantSimd128Float(signMasks, out);
|
||||
masm.bitwiseAndX4(in, out);
|
||||
masm.bitwiseAndSimd128(in, out);
|
||||
return;
|
||||
case MSimdUnaryArith::neg:
|
||||
masm.loadConstantSimd128Float(minusZero, out);
|
||||
masm.bitwiseXorX4(in, out);
|
||||
masm.bitwiseXorSimd128(in, out);
|
||||
return;
|
||||
case MSimdUnaryArith::not_:
|
||||
masm.loadConstantSimd128Float(allOnes, out);
|
||||
masm.bitwiseXorX4(in, out);
|
||||
masm.bitwiseXorSimd128(in, out);
|
||||
return;
|
||||
case MSimdUnaryArith::reciprocalApproximation:
|
||||
masm.packedRcpApproximationFloat32x4(in, out);
|
||||
@ -3709,9 +3761,9 @@ CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect* ins)
|
||||
if (!mir->mask()->isSimdBinaryComp())
|
||||
masm.packedRightShiftByScalar(Imm32(31), temp);
|
||||
|
||||
masm.bitwiseAndX4(Operand(temp), output);
|
||||
masm.bitwiseAndNotX4(Operand(onFalse), temp);
|
||||
masm.bitwiseOrX4(Operand(temp), output);
|
||||
masm.bitwiseAndSimd128(Operand(temp), output);
|
||||
masm.bitwiseAndNotSimd128(Operand(onFalse), temp);
|
||||
masm.bitwiseOrSimd128(Operand(temp), output);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -307,6 +307,8 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
|
||||
void visitSimdSwizzleI(LSimdSwizzleI* lir);
|
||||
void visitSimdSwizzleF(LSimdSwizzleF* lir);
|
||||
void visitSimdShuffle(LSimdShuffle* lir);
|
||||
void visitSimdUnaryArithIx16(LSimdUnaryArithIx16* lir);
|
||||
void visitSimdUnaryArithIx8(LSimdUnaryArithIx8* lir);
|
||||
void visitSimdUnaryArithIx4(LSimdUnaryArithIx4* lir);
|
||||
void visitSimdUnaryArithFx4(LSimdUnaryArithFx4* lir);
|
||||
void visitSimdBinaryCompIx4(LSimdBinaryCompIx4* lir);
|
||||
|
@ -259,7 +259,11 @@ enum TwoByteOpcodeID {
|
||||
OP2_PXORDQ_VdqWdq = 0xEF,
|
||||
OP2_PSLLD_VdqWdq = 0xF2,
|
||||
OP2_PMULUDQ_VdqWdq = 0xF4,
|
||||
OP2_PSUBB_VdqWdq = 0xF8,
|
||||
OP2_PSUBW_VdqWdq = 0xF9,
|
||||
OP2_PSUBD_VdqWdq = 0xFA,
|
||||
OP2_PADDB_VdqWdq = 0xFC,
|
||||
OP2_PADDW_VdqWdq = 0xFD,
|
||||
OP2_PADDD_VdqWdq = 0xFE
|
||||
};
|
||||
|
||||
|
@ -928,9 +928,9 @@ MacroAssembler::canonicalizeFloat32x4(FloatRegister reg, FloatRegister scratch)
|
||||
float nanf = float(JS::GenericNaN());
|
||||
loadConstantSimd128Float(SimdConstant::SplatX4(nanf), ifFalse);
|
||||
|
||||
bitwiseAndX4(Operand(mask), reg);
|
||||
bitwiseAndNotX4(Operand(ifFalse), mask);
|
||||
bitwiseOrX4(Operand(mask), reg);
|
||||
bitwiseAndSimd128(Operand(mask), reg);
|
||||
bitwiseAndNotSimd128(Operand(ifFalse), mask);
|
||||
bitwiseOrSimd128(Operand(mask), reg);
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
|
@ -804,24 +804,24 @@ class MacroAssemblerX86Shared : public Assembler
|
||||
vcvtdq2ps(src, dest);
|
||||
}
|
||||
|
||||
void bitwiseAndX4(const Operand& src, FloatRegister dest) {
|
||||
void bitwiseAndSimd128(const Operand& src, FloatRegister dest) {
|
||||
// TODO Using the "ps" variant for all types incurs a domain crossing
|
||||
// penalty for integer types and double.
|
||||
vandps(src, dest, dest);
|
||||
}
|
||||
void bitwiseAndNotX4(const Operand& src, FloatRegister dest) {
|
||||
void bitwiseAndNotSimd128(const Operand& src, FloatRegister dest) {
|
||||
vandnps(src, dest, dest);
|
||||
}
|
||||
void bitwiseOrX4(const Operand& src, FloatRegister dest) {
|
||||
void bitwiseOrSimd128(const Operand& src, FloatRegister dest) {
|
||||
vorps(src, dest, dest);
|
||||
}
|
||||
void bitwiseXorX4(const Operand& src, FloatRegister dest) {
|
||||
void bitwiseXorSimd128(const Operand& src, FloatRegister dest) {
|
||||
vxorps(src, dest, dest);
|
||||
}
|
||||
void zeroFloat32x4(FloatRegister dest) {
|
||||
void zeroSimd128Float(FloatRegister dest) {
|
||||
vxorps(dest, dest, dest);
|
||||
}
|
||||
void zeroInt32x4(FloatRegister dest) {
|
||||
void zeroSimd128Int(FloatRegister dest) {
|
||||
vpxor(dest, dest, dest);
|
||||
}
|
||||
|
||||
@ -939,6 +939,18 @@ class MacroAssemblerX86Shared : public Assembler
|
||||
void packedGreaterThanInt32x4(const Operand& src, FloatRegister dest) {
|
||||
vpcmpgtd(src, dest, dest);
|
||||
}
|
||||
void packedAddInt8(const Operand& src, FloatRegister dest) {
|
||||
vpaddb(src, dest, dest);
|
||||
}
|
||||
void packedSubInt8(const Operand& src, FloatRegister dest) {
|
||||
vpsubb(src, dest, dest);
|
||||
}
|
||||
void packedAddInt16(const Operand& src, FloatRegister dest) {
|
||||
vpaddw(src, dest, dest);
|
||||
}
|
||||
void packedSubInt16(const Operand& src, FloatRegister dest) {
|
||||
vpsubw(src, dest, dest);
|
||||
}
|
||||
void packedAddInt32(const Operand& src, FloatRegister dest) {
|
||||
vpaddd(src, dest, dest);
|
||||
}
|
||||
@ -1197,7 +1209,7 @@ class MacroAssemblerX86Shared : public Assembler
|
||||
static const SimdConstant zero = SimdConstant::SplatX4(0);
|
||||
static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
|
||||
if (v == zero) {
|
||||
zeroInt32x4(dest);
|
||||
zeroSimd128Int(dest);
|
||||
return true;
|
||||
}
|
||||
if (v == minusOne) {
|
||||
@ -1211,7 +1223,7 @@ class MacroAssemblerX86Shared : public Assembler
|
||||
if (v == zero) {
|
||||
// This won't get inlined if the SimdConstant v contains -0 in any
|
||||
// lane, as operator== here does a memcmp.
|
||||
zeroFloat32x4(dest);
|
||||
zeroSimd128Float(dest);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -30,8 +30,12 @@ ABIArgGenerator::next(MIRType type)
|
||||
current_ = ABIArg(stackOffset_);
|
||||
stackOffset_ += sizeof(uint64_t);
|
||||
break;
|
||||
case MIRType::Int8x16:
|
||||
case MIRType::Int16x8:
|
||||
case MIRType::Int32x4:
|
||||
case MIRType::Float32x4:
|
||||
case MIRType::Bool8x16:
|
||||
case MIRType::Bool16x8:
|
||||
case MIRType::Bool32x4:
|
||||
// SIMD values aren't passed in or out of C++, so we can make up
|
||||
// whatever internal ABI we like. visitAsmJSPassArg assumes
|
||||
|
Loading…
Reference in New Issue
Block a user