Bug 1136226 - Materialize 8x16 and 16x8 SIMD constants. r=sunfish

Rename LIR instructions:

  LInt32x4   -> LSimd128Int
  LFloat32x4 -> LSimd128Float.

These two LIR instructions can be used to materialize 128-bit SIMD vectors of
other geometries too. Also rename the masm.loadConstant{Int,Float}32x4()
functions to indicate that they can be used for other geometries.
This commit is contained in:
Jakob Olesen 2016-05-09 16:48:30 -07:00
parent b6fedab1e8
commit 6bae150dea
13 changed files with 58 additions and 61 deletions

View File

@ -4309,12 +4309,16 @@ LIRGenerator::visitSimdConstant(MSimdConstant* ins)
MOZ_ASSERT(IsSimdType(ins->type()));
switch (ins->type()) {
case MIRType::Bool32x4:
case MIRType::Int8x16:
case MIRType::Int16x8:
case MIRType::Int32x4:
define(new(alloc()) LInt32x4(), ins);
case MIRType::Bool8x16:
case MIRType::Bool16x8:
case MIRType::Bool32x4:
define(new(alloc()) LSimd128Int(), ins);
break;
case MIRType::Float32x4:
define(new(alloc()) LFloat32x4(), ins);
define(new(alloc()) LSimd128Float(), ins);
break;
default:
MOZ_CRASH("Unknown SIMD kind when generating constant");

View File

@ -246,8 +246,8 @@ class CodeGeneratorARM : public CodeGeneratorShared
public:
// Unimplemented SIMD instructions
void visitSimdSplatX4(LSimdSplatX4* lir) { MOZ_CRASH("NYI"); }
void visitInt32x4(LInt32x4* ins) { MOZ_CRASH("NYI"); }
void visitFloat32x4(LFloat32x4* ins) { MOZ_CRASH("NYI"); }
void visitSimd128Int(LSimd128Int* ins) { MOZ_CRASH("NYI"); }
void visitSimd128Float(LSimd128Float* ins) { MOZ_CRASH("NYI"); }
void visitSimdReinterpretCast(LSimdReinterpretCast* ins) { MOZ_CRASH("NYI"); }
void visitSimdExtractElementI(LSimdExtractElementI* ins) { MOZ_CRASH("NYI"); }
void visitSimdExtractElementF(LSimdExtractElementF* ins) { MOZ_CRASH("NYI"); }

View File

@ -225,8 +225,8 @@ class CodeGeneratorARM64 : public CodeGeneratorShared
public:
// Unimplemented SIMD instructions.
void visitSimdSplatX4(LSimdSplatX4* lir) { MOZ_CRASH("NYI"); }
void visitInt32x4(LInt32x4* ins) { MOZ_CRASH("NYI"); }
void visitFloat32x4(LFloat32x4* ins) { MOZ_CRASH("NYI"); }
void visitSimd128Int(LSimd128Int* ins) { MOZ_CRASH("NYI"); }
void visitSimd128Float(LSimd128Float* ins) { MOZ_CRASH("NYI"); }
void visitSimdExtractElementI(LSimdExtractElementI* ins) { MOZ_CRASH("NYI"); }
void visitSimdExtractElementF(LSimdExtractElementF* ins) { MOZ_CRASH("NYI"); }
void visitSimdBinaryCompIx4(LSimdBinaryCompIx4* lir) { MOZ_CRASH("NYI"); }

View File

@ -230,8 +230,8 @@ class CodeGeneratorMIPSShared : public CodeGeneratorShared
public:
// Unimplemented SIMD instructions
void visitSimdSplatX4(LSimdSplatX4* lir) { MOZ_CRASH("NYI"); }
void visitInt32x4(LInt32x4* ins) { MOZ_CRASH("NYI"); }
void visitFloat32x4(LFloat32x4* ins) { MOZ_CRASH("NYI"); }
void visitSimd128Int(LSimd128Int* ins) { MOZ_CRASH("NYI"); }
void visitSimd128Float(LSimd128Float* ins) { MOZ_CRASH("NYI"); }
void visitSimdReinterpretCast(LSimdReinterpretCast* ins) { MOZ_CRASH("NYI"); }
void visitSimdExtractElementI(LSimdExtractElementI* ins) { MOZ_CRASH("NYI"); }
void visitSimdExtractElementF(LSimdExtractElementF* ins) { MOZ_CRASH("NYI"); }

View File

@ -756,23 +756,24 @@ class LFloat32 : public LInstructionHelper<1, 0, 0>
}
};
// Constant SIMD int32x4. Also used for bool32x4.
class LInt32x4 : public LInstructionHelper<1, 0, 0>
// Constant 128-bit SIMD integer vector (8x16, 16x8, 32x4).
// Also used for Bool32x4, Bool16x8, etc.
class LSimd128Int : public LInstructionHelper<1, 0, 0>
{
public:
LIR_HEADER(Int32x4);
LIR_HEADER(Simd128Int);
explicit LInt32x4() {}
explicit LSimd128Int() {}
const SimdConstant& getValue() const { return mir_->toSimdConstant()->value(); }
};
// Constant SIMD float32x4.
class LFloat32x4 : public LInstructionHelper<1, 0, 0>
// Constant 128-bit SIMD floating point vector (32x4, 64x2).
class LSimd128Float : public LInstructionHelper<1, 0, 0>
{
public:
LIR_HEADER(Float32x4);
LIR_HEADER(Simd128Float);
explicit LFloat32x4() {}
explicit LSimd128Float() {}
const SimdConstant& getValue() const { return mir_->toSimdConstant()->value(); }
};

View File

@ -21,8 +21,8 @@
_(SimdBox) \
_(SimdUnbox) \
_(SimdSplatX4) \
_(Int32x4) \
_(Float32x4) \
_(Simd128Int) \
_(Simd128Float) \
_(SimdAllTrue) \
_(SimdAnyTrue) \
_(SimdReinterpretCast) \

View File

@ -49,29 +49,25 @@ MacroAssemblerX64::loadConstantFloat32(float f, FloatRegister dest)
}
void
MacroAssemblerX64::loadConstantInt32x4(const SimdConstant& v, FloatRegister dest)
MacroAssemblerX64::loadConstantSimd128Int(const SimdConstant& v, FloatRegister dest)
{
MOZ_ASSERT(v.type() == SimdConstant::Int32x4);
if (maybeInlineInt32x4(v, dest))
if (maybeInlineSimd128Int(v, dest))
return;
SimdData* val = getSimdData(v);
if (!val)
return;
MOZ_ASSERT(val->type() == SimdConstant::Int32x4);
JmpSrc j = masm.vmovdqa_ripr(dest.encoding());
propagateOOM(val->uses.append(CodeOffset(j.offset())));
}
void
MacroAssemblerX64::loadConstantFloat32x4(const SimdConstant&v, FloatRegister dest)
MacroAssemblerX64::loadConstantSimd128Float(const SimdConstant&v, FloatRegister dest)
{
MOZ_ASSERT(v.type() == SimdConstant::Float32x4);
if (maybeInlineFloat32x4(v, dest))
if (maybeInlineSimd128Float(v, dest))
return;
SimdData* val = getSimdData(v);
if (!val)
return;
MOZ_ASSERT(val->type() == SimdConstant::Float32x4);
JmpSrc j = masm.vmovaps_ripr(dest.encoding());
propagateOOM(val->uses.append(CodeOffset(j.offset())));
}

View File

@ -861,8 +861,8 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared
void loadConstantDouble(double d, FloatRegister dest);
void loadConstantFloat32(float f, FloatRegister dest);
void loadConstantInt32x4(const SimdConstant& v, FloatRegister dest);
void loadConstantFloat32x4(const SimdConstant& v, FloatRegister dest);
void loadConstantSimd128Int(const SimdConstant& v, FloatRegister dest);
void loadConstantSimd128Float(const SimdConstant& v, FloatRegister dest);
Condition testInt32Truthy(bool truthy, const ValueOperand& operand) {
test32(operand.valueReg(), operand.valueReg());

View File

@ -2424,17 +2424,17 @@ CodeGeneratorX86Shared::visitNegF(LNegF* ins)
}
void
CodeGeneratorX86Shared::visitInt32x4(LInt32x4* ins)
CodeGeneratorX86Shared::visitSimd128Int(LSimd128Int* ins)
{
const LDefinition* out = ins->getDef(0);
masm.loadConstantInt32x4(ins->getValue(), ToFloatRegister(out));
masm.loadConstantSimd128Int(ins->getValue(), ToFloatRegister(out));
}
void
CodeGeneratorX86Shared::visitFloat32x4(LFloat32x4* ins)
CodeGeneratorX86Shared::visitSimd128Float(LSimd128Float* ins)
{
const LDefinition* out = ins->getDef(0);
masm.loadConstantFloat32x4(ins->getValue(), ToFloatRegister(out));
masm.loadConstantSimd128Float(ins->getValue(), ToFloatRegister(out));
}
void
@ -2460,7 +2460,7 @@ CodeGeneratorX86Shared::visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins)
static const SimdConstant InvalidResult = SimdConstant::SplatX4(int32_t(-2147483648));
ScratchSimd128Scope scratch(masm);
masm.loadConstantInt32x4(InvalidResult, scratch);
masm.loadConstantSimd128Int(InvalidResult, scratch);
masm.packedEqualInt32x4(Operand(out), scratch);
// TODO (bug 1156228): If we have SSE4.1, we can use PTEST here instead of
// the two following instructions.
@ -2483,13 +2483,13 @@ CodeGeneratorX86Shared::visitOutOfLineSimdFloatToIntCheck(OutOfLineSimdFloatToIn
Register temp = ool->temp();
ScratchSimd128Scope scratch(masm);
masm.loadConstantFloat32x4(Int32MinX4, scratch);
masm.loadConstantSimd128Float(Int32MinX4, scratch);
masm.vcmpleps(Operand(input), scratch, scratch);
masm.vmovmskps(scratch, temp);
masm.cmp32(temp, Imm32(15));
masm.j(Assembler::NotEqual, &onConversionError);
masm.loadConstantFloat32x4(Int32MaxX4, scratch);
masm.loadConstantSimd128Float(Int32MaxX4, scratch);
masm.vcmpleps(Operand(input), scratch, scratch);
masm.vmovmskps(scratch, temp);
masm.cmp32(temp, Imm32(0));
@ -2550,7 +2550,7 @@ CodeGeneratorX86Shared::visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins)
// Compute B in |scratch|.
static const float Adjust = 0x80000000; // 0x1.0p31f for the benefit of MSVC.
static const SimdConstant Bias = SimdConstant::SplatX4(-Adjust);
masm.loadConstantFloat32x4(Bias, scratch);
masm.loadConstantSimd128Float(Bias, scratch);
masm.packedAddFloat32(Operand(in), scratch);
masm.convertFloat32x4ToInt32x4(scratch, scratch);
@ -3234,7 +3234,7 @@ CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4* ins)
// Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
// should invert the comparison by, e.g. swapping the arms of a select
// if that's what it's used in.
masm.loadConstantInt32x4(allOnes, scratch);
masm.loadConstantSimd128Int(allOnes, scratch);
masm.packedEqualInt32x4(rhs, lhs);
masm.bitwiseXorX4(Operand(scratch), lhs);
return;
@ -3245,12 +3245,12 @@ CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4* ins)
else
masm.loadAlignedInt32x4(rhs, scratch);
masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch);
masm.loadConstantInt32x4(allOnes, lhs);
masm.loadConstantSimd128Int(allOnes, lhs);
masm.bitwiseXorX4(Operand(scratch), lhs);
return;
case MSimdBinaryComp::lessThanOrEqual:
// lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
masm.loadConstantInt32x4(allOnes, scratch);
masm.loadConstantSimd128Int(allOnes, scratch);
masm.packedGreaterThanInt32x4(rhs, lhs);
masm.bitwiseXorX4(Operand(scratch), lhs);
return;
@ -3389,7 +3389,7 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4* ins)
}
case MSimdBinaryArith::Op_minNum: {
FloatRegister tmp = ToFloatRegister(ins->temp());
masm.loadConstantInt32x4(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
masm.loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
FloatRegister mask = scratch;
FloatRegister tmpCopy = masm.reusedInputFloat32x4(tmp, scratch);
@ -3419,11 +3419,11 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4* ins)
}
case MSimdBinaryArith::Op_maxNum: {
FloatRegister mask = scratch;
masm.loadConstantInt32x4(SimdConstant::SplatX4(0), mask);
masm.loadConstantSimd128Int(SimdConstant::SplatX4(0), mask);
masm.vpcmpeqd(Operand(lhs), mask, mask);
FloatRegister tmp = ToFloatRegister(ins->temp());
masm.loadConstantInt32x4(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
masm.loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
masm.vandps(tmp, mask, mask);
FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
@ -3469,7 +3469,7 @@ CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4* ins)
masm.packedSubInt32(in, out);
return;
case MSimdUnaryArith::not_:
masm.loadConstantInt32x4(allOnes, out);
masm.loadConstantSimd128Int(allOnes, out);
masm.bitwiseXorX4(in, out);
return;
case MSimdUnaryArith::abs:
@ -3500,15 +3500,15 @@ CodeGeneratorX86Shared::visitSimdUnaryArithFx4(LSimdUnaryArithFx4* ins)
switch (ins->operation()) {
case MSimdUnaryArith::abs:
masm.loadConstantFloat32x4(signMasks, out);
masm.loadConstantSimd128Float(signMasks, out);
masm.bitwiseAndX4(in, out);
return;
case MSimdUnaryArith::neg:
masm.loadConstantFloat32x4(minusZero, out);
masm.loadConstantSimd128Float(minusZero, out);
masm.bitwiseXorX4(in, out);
return;
case MSimdUnaryArith::not_:
masm.loadConstantFloat32x4(allOnes, out);
masm.loadConstantSimd128Float(allOnes, out);
masm.bitwiseXorX4(in, out);
return;
case MSimdUnaryArith::reciprocalApproximation:

View File

@ -288,8 +288,8 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
void visitSimdValueInt32x4(LSimdValueInt32x4* lir);
void visitSimdValueFloat32x4(LSimdValueFloat32x4* lir);
void visitSimdSplatX4(LSimdSplatX4* lir);
void visitInt32x4(LInt32x4* ins);
void visitFloat32x4(LFloat32x4* ins);
void visitSimd128Int(LSimd128Int* ins);
void visitSimd128Float(LSimd128Float* ins);
void visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4* ins);
void visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins);
void visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins);

View File

@ -1245,7 +1245,7 @@ class MacroAssemblerX86Shared : public Assembler
return false;
}
bool maybeInlineInt32x4(const SimdConstant& v, const FloatRegister& dest) {
bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
static const SimdConstant zero = SimdConstant::SplatX4(0);
static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
if (v == zero) {
@ -1258,7 +1258,7 @@ class MacroAssemblerX86Shared : public Assembler
}
return false;
}
bool maybeInlineFloat32x4(const SimdConstant& v, const FloatRegister& dest) {
bool maybeInlineSimd128Float(const SimdConstant& v, const FloatRegister& dest) {
static const SimdConstant zero = SimdConstant::SplatX4(0.f);
if (v == zero) {
// This won't get inlined if the SimdConstant v contains -0 in any

View File

@ -115,29 +115,25 @@ MacroAssemblerX86::loadConstantFloat32(float f, FloatRegister dest)
}
void
MacroAssemblerX86::loadConstantInt32x4(const SimdConstant& v, FloatRegister dest)
MacroAssemblerX86::loadConstantSimd128Int(const SimdConstant& v, FloatRegister dest)
{
MOZ_ASSERT(v.type() == SimdConstant::Int32x4);
if (maybeInlineInt32x4(v, dest))
if (maybeInlineSimd128Int(v, dest))
return;
SimdData* i4 = getSimdData(v);
if (!i4)
return;
MOZ_ASSERT(i4->type() == SimdConstant::Int32x4);
masm.vmovdqa_mr(nullptr, dest.encoding());
propagateOOM(i4->uses.append(CodeOffset(masm.size())));
}
void
MacroAssemblerX86::loadConstantFloat32x4(const SimdConstant& v, FloatRegister dest)
MacroAssemblerX86::loadConstantSimd128Float(const SimdConstant& v, FloatRegister dest)
{
MOZ_ASSERT(v.type() == SimdConstant::Float32x4);
if (maybeInlineFloat32x4(v, dest))
if (maybeInlineSimd128Float(v, dest))
return;
SimdData* f4 = getSimdData(v);
if (!f4)
return;
MOZ_ASSERT(f4->type() == SimdConstant::Float32x4);
masm.vmovaps_mr(nullptr, dest.encoding());
propagateOOM(f4->uses.append(CodeOffset(masm.size())));
}

View File

@ -785,8 +785,8 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
void loadConstantDouble(double d, FloatRegister dest);
void loadConstantFloat32(float f, FloatRegister dest);
void loadConstantInt32x4(const SimdConstant& v, FloatRegister dest);
void loadConstantFloat32x4(const SimdConstant& v, FloatRegister dest);
void loadConstantSimd128Int(const SimdConstant& v, FloatRegister dest);
void loadConstantSimd128Float(const SimdConstant& v, FloatRegister dest);
Condition testInt32Truthy(bool truthy, const ValueOperand& operand) {
test32(operand.payloadReg(), operand.payloadReg());