mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-13 21:35:39 +00:00
Bug 1021716: SIMD x86-x64: Implement MSimdShuffleMix; r=sunfish
This commit is contained in:
parent
39ac3875bc
commit
b2b91caa03
@ -242,10 +242,10 @@ class LSimdSwizzleBase : public LInstructionHelper<1, 1, 0>
|
||||
return getOperand(0);
|
||||
}
|
||||
|
||||
SimdLane laneX() const { return mir_->toSimdSwizzle()->laneX(); }
|
||||
SimdLane laneY() const { return mir_->toSimdSwizzle()->laneY(); }
|
||||
SimdLane laneZ() const { return mir_->toSimdSwizzle()->laneZ(); }
|
||||
SimdLane laneW() const { return mir_->toSimdSwizzle()->laneW(); }
|
||||
int32_t laneX() const { return mir_->toSimdSwizzle()->laneX(); }
|
||||
int32_t laneY() const { return mir_->toSimdSwizzle()->laneY(); }
|
||||
int32_t laneZ() const { return mir_->toSimdSwizzle()->laneZ(); }
|
||||
int32_t laneW() const { return mir_->toSimdSwizzle()->laneW(); }
|
||||
};
|
||||
|
||||
// Shuffles a int32x4 into another int32x4 vector.
|
||||
@ -265,6 +265,27 @@ class LSimdSwizzleF : public LSimdSwizzleBase
|
||||
{}
|
||||
};
|
||||
|
||||
// Base class for both int32x4 and float32x4 shuffle instructions.
|
||||
class LSimdShuffle : public LInstructionHelper<1, 2, 0>
|
||||
{
|
||||
public:
|
||||
LIR_HEADER(SimdShuffle);
|
||||
LSimdShuffle()
|
||||
{}
|
||||
|
||||
const LAllocation *lhs() {
|
||||
return getOperand(0);
|
||||
}
|
||||
const LAllocation *rhs() {
|
||||
return getOperand(1);
|
||||
}
|
||||
|
||||
int32_t laneX() const { return mir_->toSimdShuffle()->laneX(); }
|
||||
int32_t laneY() const { return mir_->toSimdShuffle()->laneY(); }
|
||||
int32_t laneZ() const { return mir_->toSimdShuffle()->laneZ(); }
|
||||
int32_t laneW() const { return mir_->toSimdShuffle()->laneW(); }
|
||||
};
|
||||
|
||||
// Binary SIMD comparison operation between two SIMD operands
|
||||
class LSimdBinaryComp: public LInstructionHelper<1, 2, 0>
|
||||
{
|
||||
|
@ -26,6 +26,7 @@
|
||||
_(SimdSignMaskX4) \
|
||||
_(SimdSwizzleI) \
|
||||
_(SimdSwizzleF) \
|
||||
_(SimdShuffle) \
|
||||
_(SimdUnaryArithIx4) \
|
||||
_(SimdUnaryArithFx4) \
|
||||
_(SimdBinaryCompIx4) \
|
||||
|
@ -3830,6 +3830,24 @@ LIRGenerator::visitSimdSwizzle(MSimdSwizzle *ins)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
LIRGenerator::visitSimdShuffle(MSimdShuffle *ins)
|
||||
{
|
||||
MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
|
||||
MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
|
||||
MOZ_ASSERT(IsSimdType(ins->type()));
|
||||
|
||||
if (ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4) {
|
||||
MDefinition *lhs = ins->lhs();
|
||||
MDefinition *rhs = ins->rhs();
|
||||
LSimdShuffle *lir = new (alloc()) LSimdShuffle;
|
||||
return lowerForFPU(lir, ins, lhs, rhs);
|
||||
}
|
||||
|
||||
MOZ_CRASH("Unknown SIMD kind when getting lane");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
LIRGenerator::visitSimdUnaryArith(MSimdUnaryArith *ins)
|
||||
{
|
||||
|
@ -273,6 +273,7 @@ class LIRGenerator : public LIRGeneratorSpecific
|
||||
bool visitSimdInsertElement(MSimdInsertElement *ins);
|
||||
bool visitSimdSignMask(MSimdSignMask *ins);
|
||||
bool visitSimdSwizzle(MSimdSwizzle *ins);
|
||||
bool visitSimdShuffle(MSimdShuffle *ins);
|
||||
bool visitSimdUnaryArith(MSimdUnaryArith *ins);
|
||||
bool visitSimdBinaryComp(MSimdBinaryComp *ins);
|
||||
bool visitSimdBinaryArith(MSimdBinaryArith *ins);
|
||||
|
121
js/src/jit/MIR.h
121
js/src/jit/MIR.h
@ -1575,34 +1575,51 @@ class MSimdSignMask : public MUnaryInstruction
|
||||
ALLOW_CLONE(MSimdSignMask)
|
||||
};
|
||||
|
||||
// Base for the MSimdSwizzle and MSimdShuffle classes.
|
||||
class MSimdShuffleBase
|
||||
{
|
||||
protected:
|
||||
// As of now, there are at most 4 lanes. For each lane, we need to know
|
||||
// which input we choose and which of the 4 lanes we choose; that can be
|
||||
// packed in 3 bits for each lane, so 12 bits in total.
|
||||
uint32_t laneMask_;
|
||||
uint32_t arity_;
|
||||
|
||||
MSimdShuffleBase(int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW, MIRType type)
|
||||
{
|
||||
MOZ_ASSERT(SimdTypeToLength(type) == 4);
|
||||
MOZ_ASSERT(IsSimdType(type));
|
||||
laneMask_ = (laneX << 0) | (laneY << 3) | (laneZ << 6) | (laneW << 9);
|
||||
arity_ = 4;
|
||||
}
|
||||
|
||||
bool sameLanes(const MSimdShuffleBase *other) const {
|
||||
return laneMask_ == other->laneMask_;
|
||||
}
|
||||
|
||||
public:
|
||||
// For now, these formulas are fine for x4 types. They'll need to be
|
||||
// generalized for other SIMD type lengths.
|
||||
int32_t laneX() const { MOZ_ASSERT(arity_ == 4); return laneMask_ & 7; }
|
||||
int32_t laneY() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 3) & 7; }
|
||||
int32_t laneZ() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 6) & 7; }
|
||||
int32_t laneW() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 9) & 7; }
|
||||
};
|
||||
|
||||
// Applies a shuffle operation to the input, putting the input lanes as
|
||||
// indicated in the output register's lanes. This implements the SIMD.js
|
||||
// "shuffle" function, that takes one vector and one mask.
|
||||
class MSimdSwizzle : public MUnaryInstruction
|
||||
class MSimdSwizzle : public MUnaryInstruction, public MSimdShuffleBase
|
||||
{
|
||||
protected:
|
||||
// As of now, there are at most 4 lanes.
|
||||
SimdLane laneX_;
|
||||
SimdLane laneY_;
|
||||
SimdLane laneZ_;
|
||||
SimdLane laneW_;
|
||||
|
||||
MSimdSwizzle(MDefinition *obj, MIRType type,
|
||||
SimdLane laneX, SimdLane laneY, SimdLane laneZ, SimdLane laneW)
|
||||
: MUnaryInstruction(obj),
|
||||
laneX_(laneX), laneY_(laneY), laneZ_(laneZ), laneW_(laneW)
|
||||
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
|
||||
: MUnaryInstruction(obj), MSimdShuffleBase(laneX, laneY, laneZ, laneW, type)
|
||||
{
|
||||
MOZ_ASSERT(laneX < 4 && laneY < 4 && laneZ < 4 && laneW < 4);
|
||||
MOZ_ASSERT(IsSimdType(obj->type()));
|
||||
// Returned value needs to be in a vector too
|
||||
MOZ_ASSERT(IsSimdType(type));
|
||||
MOZ_ASSERT(SimdTypeToScalarType(obj->type()) == type);
|
||||
|
||||
mozilla::DebugOnly<uint32_t> expectedLength = SimdTypeToLength(obj->type());
|
||||
MOZ_ASSERT(uint32_t(laneX_) < expectedLength);
|
||||
MOZ_ASSERT(uint32_t(laneY_) < expectedLength);
|
||||
MOZ_ASSERT(uint32_t(laneZ_) < expectedLength);
|
||||
MOZ_ASSERT(uint32_t(laneW_) < expectedLength);
|
||||
|
||||
MOZ_ASSERT(obj->type() == type);
|
||||
setResultType(type);
|
||||
setMovable();
|
||||
}
|
||||
@ -1611,36 +1628,68 @@ class MSimdSwizzle : public MUnaryInstruction
|
||||
INSTRUCTION_HEADER(SimdSwizzle);
|
||||
|
||||
static MSimdSwizzle *NewAsmJS(TempAllocator &alloc, MDefinition *obj, MIRType type,
|
||||
SimdLane laneX, SimdLane laneY, SimdLane laneZ, SimdLane laneW)
|
||||
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
|
||||
{
|
||||
return new(alloc) MSimdSwizzle(obj, type, laneX, laneY, laneZ, laneW);
|
||||
}
|
||||
|
||||
SimdLane laneX() const { return laneX_; }
|
||||
SimdLane laneY() const { return laneY_; }
|
||||
SimdLane laneZ() const { return laneZ_; }
|
||||
SimdLane laneW() const { return laneW_; }
|
||||
|
||||
AliasSet getAliasSet() const {
|
||||
return AliasSet::None();
|
||||
}
|
||||
bool congruentTo(const MDefinition *ins) const {
|
||||
if (!ins->isSimdSwizzle())
|
||||
return false;
|
||||
const MSimdSwizzle *other = ins->toSimdSwizzle();
|
||||
if (other->laneX_ != laneX_ ||
|
||||
other->laneY_ != laneY_ ||
|
||||
other->laneZ_ != laneZ_ ||
|
||||
other->laneW_ != laneW_)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return congruentIfOperandsEqual(other);
|
||||
return sameLanes(other) && congruentIfOperandsEqual(other);
|
||||
}
|
||||
|
||||
AliasSet getAliasSet() const {
|
||||
return AliasSet::None();
|
||||
}
|
||||
|
||||
ALLOW_CLONE(MSimdSwizzle)
|
||||
};
|
||||
|
||||
// Applies a shuffle operation to the inputs, selecting the 2 first lanes of the
|
||||
// output from lanes of the first input, and the 2 last lanes of the output from
|
||||
// lanes of the second input.
|
||||
class MSimdShuffle : public MBinaryInstruction, public MSimdShuffleBase
|
||||
{
|
||||
MSimdShuffle(MDefinition *lhs, MDefinition *rhs, MIRType type,
|
||||
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
|
||||
: MBinaryInstruction(lhs, rhs), MSimdShuffleBase(laneX, laneY, laneZ, laneW, lhs->type())
|
||||
{
|
||||
MOZ_ASSERT(laneX < 8 && laneY < 8 && laneZ < 8 && laneW < 8);
|
||||
MOZ_ASSERT(IsSimdType(lhs->type()));
|
||||
MOZ_ASSERT(IsSimdType(rhs->type()));
|
||||
MOZ_ASSERT(lhs->type() == rhs->type());
|
||||
MOZ_ASSERT(IsSimdType(type));
|
||||
MOZ_ASSERT(lhs->type() == type);
|
||||
setResultType(type);
|
||||
setMovable();
|
||||
}
|
||||
|
||||
public:
|
||||
INSTRUCTION_HEADER(SimdShuffle);
|
||||
|
||||
static MSimdShuffle *NewAsmJS(TempAllocator &alloc, MDefinition *lhs, MDefinition *rhs,
|
||||
MIRType type, int32_t laneX, int32_t laneY, int32_t laneZ,
|
||||
int32_t laneW)
|
||||
{
|
||||
return new(alloc) MSimdShuffle(lhs, rhs, type, laneX, laneY, laneZ, laneW);
|
||||
}
|
||||
|
||||
bool congruentTo(const MDefinition *ins) const {
|
||||
if (!ins->isSimdShuffle())
|
||||
return false;
|
||||
const MSimdShuffle *other = ins->toSimdShuffle();
|
||||
return sameLanes(other) && binaryCongruentTo(other);
|
||||
}
|
||||
|
||||
AliasSet getAliasSet() const {
|
||||
return AliasSet::None();
|
||||
}
|
||||
|
||||
ALLOW_CLONE(MSimdShuffle)
|
||||
};
|
||||
|
||||
class MSimdUnaryArith : public MUnaryInstruction
|
||||
{
|
||||
public:
|
||||
|
@ -21,6 +21,7 @@ namespace jit {
|
||||
_(SimdInsertElement) \
|
||||
_(SimdSignMask) \
|
||||
_(SimdSwizzle) \
|
||||
_(SimdShuffle) \
|
||||
_(SimdUnaryArith) \
|
||||
_(SimdBinaryComp) \
|
||||
_(SimdBinaryArith) \
|
||||
|
@ -120,6 +120,7 @@ class ParallelSafetyVisitor : public MDefinitionVisitor
|
||||
SAFE_OP(SimdInsertElement)
|
||||
SAFE_OP(SimdSignMask)
|
||||
SAFE_OP(SimdSwizzle)
|
||||
SAFE_OP(SimdShuffle)
|
||||
SAFE_OP(SimdUnaryArith)
|
||||
SAFE_OP(SimdBinaryComp)
|
||||
SAFE_OP(SimdBinaryArith)
|
||||
|
@ -1864,6 +1864,22 @@ class AssemblerX86Shared : public AssemblerShared
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.shufps_irr(mask, src.code(), dest.code());
|
||||
}
|
||||
void shufps(uint32_t mask, const Operand &src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.shufps_irr(mask, src.fpu(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.shufps_imr(mask, src.disp(), src.base(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.shufps_imr(mask, src.address(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void addsd(FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.addsd_rr(src.code(), dest.code());
|
||||
|
@ -2940,7 +2940,7 @@ public:
|
||||
void pshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(mask < 256);
|
||||
spew("pshufd 0x%x, %s, %s",
|
||||
spew("pshufd 0x%x, %s, %s",
|
||||
mask, nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, (RegisterID)dst, (RegisterID)src);
|
||||
@ -2956,6 +2956,24 @@ public:
|
||||
m_formatter.immediate8(uint8_t(mask));
|
||||
}
|
||||
|
||||
void shufps_imr(uint32_t mask, int offset, RegisterID base, XMMRegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(mask < 256);
|
||||
spew("shufps 0x%x, %s0x%x(%s), %s",
|
||||
mask, PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, (RegisterID)dst, base, offset);
|
||||
m_formatter.immediate8(uint8_t(mask));
|
||||
}
|
||||
|
||||
void shufps_imr(uint32_t mask, const void* address, XMMRegisterID dst)
|
||||
{
|
||||
spew("shufps %x, %p, %s",
|
||||
mask, address, nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_F3);
|
||||
m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, (RegisterID)dst, address);
|
||||
m_formatter.immediate8(uint8_t(mask));
|
||||
}
|
||||
|
||||
void movhlps_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("movhlps %s, %s",
|
||||
|
@ -2412,6 +2412,19 @@ CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF *ins)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
|
||||
{
|
||||
FloatRegister lhs = ToFloatRegister(ins->lhs());
|
||||
Operand rhs = ToOperand(ins->rhs());
|
||||
MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
|
||||
|
||||
uint32_t mask = MacroAssembler::ComputeShuffleMask(ins->laneX(), ins->laneY(), ins->laneZ() - 4,
|
||||
ins->laneW() - 4);
|
||||
masm.shuffleMix(mask, rhs, lhs);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *ins)
|
||||
{
|
||||
|
@ -221,6 +221,7 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
|
||||
bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins);
|
||||
bool visitSimdSwizzleI(LSimdSwizzleI *lir);
|
||||
bool visitSimdSwizzleF(LSimdSwizzleF *lir);
|
||||
bool visitSimdShuffle(LSimdShuffle *lir);
|
||||
bool visitSimdUnaryArithIx4(LSimdUnaryArithIx4 *lir);
|
||||
bool visitSimdUnaryArithFx4(LSimdUnaryArithFx4 *lir);
|
||||
bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);
|
||||
|
@ -595,13 +595,12 @@ class MacroAssemblerX86Shared : public Assembler
|
||||
void packedDivFloat32(const Operand &src, FloatRegister dest) {
|
||||
divps(src, dest);
|
||||
}
|
||||
static uint32_t ComputeShuffleMask(SimdLane x, SimdLane y = LaneX,
|
||||
SimdLane z = LaneX, SimdLane w = LaneX)
|
||||
|
||||
static uint32_t ComputeShuffleMask(uint32_t x = LaneX, uint32_t y = LaneY,
|
||||
uint32_t z = LaneZ, uint32_t w = LaneW)
|
||||
{
|
||||
uint32_t r = (uint32_t(w) << 6) |
|
||||
(uint32_t(z) << 4) |
|
||||
(uint32_t(y) << 2) |
|
||||
uint32_t(x);
|
||||
MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
|
||||
uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
|
||||
MOZ_ASSERT(r < 256);
|
||||
return r;
|
||||
}
|
||||
@ -626,6 +625,11 @@ class MacroAssemblerX86Shared : public Assembler
|
||||
moveAlignedFloat32x4(src, dest);
|
||||
shufps(mask, dest, dest);
|
||||
}
|
||||
void shuffleMix(uint32_t mask, const Operand &src, FloatRegister dest) {
|
||||
// Note this uses shufps, which is a cross-domain penaly on CPU where it
|
||||
// applies, but that's the way clang and gcc do it.
|
||||
shufps(mask, src, dest);
|
||||
}
|
||||
|
||||
void moveFloatAsDouble(Register src, FloatRegister dest) {
|
||||
movd(src, dest);
|
||||
|
Loading…
Reference in New Issue
Block a user