mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-25 11:15:34 +00:00
Bug 1136226 - Make MSimdSwizzle and MSimdShuffle length-agnostic. r=bbouvier
Constructors and factories take lane lists as arrays instead of four separate lane arguments.
This commit is contained in:
parent
3da4ad1117
commit
5824c75e03
@ -493,8 +493,8 @@ class MOZ_STACK_CLASS ExprIter : private Policy
|
||||
MOZ_MUST_USE bool readReplaceLane(ValType simdType, uint8_t* lane,
|
||||
Value* vector, Value* scalar);
|
||||
MOZ_MUST_USE bool readSplat(ValType simdType, Value* scalar);
|
||||
MOZ_MUST_USE bool readSwizzle(ValType simdType, uint8_t (* lanes)[4], Value* vector);
|
||||
MOZ_MUST_USE bool readShuffle(ValType simdType, uint8_t (* lanes)[4],
|
||||
MOZ_MUST_USE bool readSwizzle(ValType simdType, uint8_t (* lanes)[16], Value* vector);
|
||||
MOZ_MUST_USE bool readShuffle(ValType simdType, uint8_t (* lanes)[16],
|
||||
Value* lhs, Value* rhs);
|
||||
MOZ_MUST_USE bool readSimdSelect(ValType simdType, Value* trueValue,
|
||||
Value* falseValue,
|
||||
@ -1652,7 +1652,7 @@ ExprIter<Policy>::readSplat(ValType simdType, Value* scalar)
|
||||
|
||||
template <typename Policy>
|
||||
inline bool
|
||||
ExprIter<Policy>::readSwizzle(ValType simdType, uint8_t (* lanes)[4], Value* vector)
|
||||
ExprIter<Policy>::readSwizzle(ValType simdType, uint8_t (* lanes)[16], Value* vector)
|
||||
{
|
||||
MOZ_ASSERT(Classify(expr_) == ExprKind::Swizzle);
|
||||
|
||||
@ -1676,7 +1676,7 @@ ExprIter<Policy>::readSwizzle(ValType simdType, uint8_t (* lanes)[4], Value* vec
|
||||
|
||||
template <typename Policy>
|
||||
inline bool
|
||||
ExprIter<Policy>::readShuffle(ValType simdType, uint8_t (* lanes)[4], Value* lhs, Value* rhs)
|
||||
ExprIter<Policy>::readShuffle(ValType simdType, uint8_t (* lanes)[16], Value* lhs, Value* rhs)
|
||||
{
|
||||
MOZ_ASSERT(Classify(expr_) == ExprKind::Shuffle);
|
||||
|
||||
|
@ -340,26 +340,25 @@ class FunctionCompiler
|
||||
return ins;
|
||||
}
|
||||
|
||||
MDefinition* swizzleSimd(MDefinition* vector, int32_t X, int32_t Y, int32_t Z, int32_t W,
|
||||
MIRType type)
|
||||
MDefinition* swizzleSimd(MDefinition* vector, const uint8_t lanes[], MIRType type)
|
||||
{
|
||||
if (inDeadCode())
|
||||
return nullptr;
|
||||
|
||||
MOZ_ASSERT(vector->type() == type);
|
||||
MSimdSwizzle* ins = MSimdSwizzle::New(alloc(), vector, X, Y, Z, W);
|
||||
MSimdSwizzle* ins = MSimdSwizzle::New(alloc(), vector, lanes);
|
||||
curBlock_->add(ins);
|
||||
return ins;
|
||||
}
|
||||
|
||||
MDefinition* shuffleSimd(MDefinition* lhs, MDefinition* rhs, int32_t X, int32_t Y,
|
||||
int32_t Z, int32_t W, MIRType type)
|
||||
MDefinition* shuffleSimd(MDefinition* lhs, MDefinition* rhs, const uint8_t lanes[],
|
||||
MIRType type)
|
||||
{
|
||||
if (inDeadCode())
|
||||
return nullptr;
|
||||
|
||||
MOZ_ASSERT(lhs->type() == type);
|
||||
MInstruction* ins = MSimdShuffle::New(alloc(), lhs, rhs, X, Y, Z, W);
|
||||
MInstruction* ins = MSimdShuffle::New(alloc(), lhs, rhs, lanes);
|
||||
curBlock_->add(ins);
|
||||
return ins;
|
||||
}
|
||||
@ -2433,27 +2432,25 @@ EmitSimdConvert(FunctionCompiler& f, ValType fromType, ValType toType, SimdSign
|
||||
static bool
|
||||
EmitSimdSwizzle(FunctionCompiler& f, ValType simdType)
|
||||
{
|
||||
uint8_t lanes[4];
|
||||
uint8_t lanes[16];
|
||||
MDefinition* vector;
|
||||
if (!f.iter().readSwizzle(simdType, &lanes, &vector))
|
||||
return false;
|
||||
|
||||
f.iter().setResult(f.swizzleSimd(vector, lanes[0], lanes[1], lanes[2], lanes[3],
|
||||
ToMIRType(simdType)));
|
||||
f.iter().setResult(f.swizzleSimd(vector, lanes, ToMIRType(simdType)));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
EmitSimdShuffle(FunctionCompiler& f, ValType simdType)
|
||||
{
|
||||
uint8_t lanes[4];
|
||||
uint8_t lanes[16];
|
||||
MDefinition* lhs;
|
||||
MDefinition* rhs;
|
||||
if (!f.iter().readShuffle(simdType, &lanes, &lhs, &rhs))
|
||||
return false;
|
||||
|
||||
f.iter().setResult(f.shuffleSimd(lhs, rhs, lanes[0], lanes[1], lanes[2], lanes[3],
|
||||
ToMIRType(simdType)));
|
||||
f.iter().setResult(f.shuffleSimd(lhs, rhs, lanes, ToMIRType(simdType)));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -85,6 +85,10 @@ class FixedList
|
||||
return list_[index];
|
||||
}
|
||||
|
||||
T* data() {
|
||||
return list_;
|
||||
}
|
||||
|
||||
T* begin() {
|
||||
return list_;
|
||||
}
|
||||
|
@ -4508,9 +4508,9 @@ LIRGenerator::visitSimdShuffle(MSimdShuffle* ins)
|
||||
MOZ_ASSERT(IsSimdType(ins->type()));
|
||||
MOZ_ASSERT(ins->type() == MIRType::Int32x4 || ins->type() == MIRType::Float32x4);
|
||||
|
||||
bool zFromLHS = ins->laneZ() < 4;
|
||||
bool wFromLHS = ins->laneW() < 4;
|
||||
uint32_t lanesFromLHS = (ins->laneX() < 4) + (ins->laneY() < 4) + zFromLHS + wFromLHS;
|
||||
bool zFromLHS = ins->lane(2) < 4;
|
||||
bool wFromLHS = ins->lane(3) < 4;
|
||||
uint32_t lanesFromLHS = (ins->lane(0) < 4) + (ins->lane(1) < 4) + zFromLHS + wFromLHS;
|
||||
|
||||
LSimdShuffle* lir = new (alloc()) LSimdShuffle();
|
||||
lowerForFPU(lir, ins, ins->lhs(), ins->rhs());
|
||||
|
@ -1191,7 +1191,7 @@ MSimdSwizzle::foldsTo(TempAllocator& alloc)
|
||||
MDefinition*
|
||||
MSimdGeneralShuffle::foldsTo(TempAllocator& alloc)
|
||||
{
|
||||
FixedList<uint32_t> lanes;
|
||||
FixedList<uint8_t> lanes;
|
||||
if (!lanes.init(alloc, numLanes()))
|
||||
return this;
|
||||
|
||||
@ -1199,16 +1199,16 @@ MSimdGeneralShuffle::foldsTo(TempAllocator& alloc)
|
||||
if (!lane(i)->isConstant() || lane(i)->type() != MIRType::Int32)
|
||||
return this;
|
||||
int32_t temp = lane(i)->toConstant()->toInt32();
|
||||
if (temp < 0 || uint32_t(temp) >= numLanes() * numVectors())
|
||||
if (temp < 0 || unsigned(temp) >= numLanes() * numVectors())
|
||||
return this;
|
||||
lanes[i] = uint32_t(temp);
|
||||
lanes[i] = uint8_t(temp);
|
||||
}
|
||||
|
||||
if (numVectors() == 1)
|
||||
return MSimdSwizzle::New(alloc, vector(0), lanes[0], lanes[1], lanes[2], lanes[3]);
|
||||
return MSimdSwizzle::New(alloc, vector(0), lanes.data());
|
||||
|
||||
MOZ_ASSERT(numVectors() == 2);
|
||||
return MSimdShuffle::New(alloc, vector(0), vector(1), lanes[0], lanes[1], lanes[2], lanes[3]);
|
||||
return MSimdShuffle::New(alloc, vector(0), vector(1), lanes.data());
|
||||
}
|
||||
|
||||
MInstruction*
|
||||
|
@ -1963,51 +1963,49 @@ class MSimdAnyTrue
|
||||
class MSimdShuffleBase
|
||||
{
|
||||
protected:
|
||||
// As of now, there are at most 4 lanes. For each lane, we need to know
|
||||
// which input we choose and which of the 4 lanes we choose; that can be
|
||||
// packed in 3 bits for each lane, so 12 bits in total.
|
||||
uint32_t laneMask_;
|
||||
// As of now, there are at most 16 lanes. For each lane, we need to know
|
||||
// which input we choose and which of the lanes we choose.
|
||||
mozilla::Array<uint8_t, 16> lane_;
|
||||
uint32_t arity_;
|
||||
|
||||
MSimdShuffleBase(uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW, MIRType type)
|
||||
MSimdShuffleBase(const uint8_t lanes[], MIRType type)
|
||||
{
|
||||
MOZ_ASSERT(SimdTypeToLength(type) == 4);
|
||||
MOZ_ASSERT(IsSimdType(type));
|
||||
laneMask_ = (laneX << 0) | (laneY << 3) | (laneZ << 6) | (laneW << 9);
|
||||
arity_ = 4;
|
||||
arity_ = SimdTypeToLength(type);
|
||||
for (unsigned i = 0; i < arity_; i++)
|
||||
lane_[i] = lanes[i];
|
||||
}
|
||||
|
||||
bool sameLanes(const MSimdShuffleBase* other) const {
|
||||
return laneMask_ == other->laneMask_;
|
||||
return arity_ == other->arity_ &&
|
||||
memcmp(&lane_[0], &other->lane_[0], arity_) == 0;
|
||||
}
|
||||
|
||||
public:
|
||||
// For now, these formulas are fine for x4 types. They'll need to be
|
||||
// generalized for other SIMD type lengths.
|
||||
uint32_t laneX() const { MOZ_ASSERT(arity_ == 4); return laneMask_ & 7; }
|
||||
uint32_t laneY() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 3) & 7; }
|
||||
uint32_t laneZ() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 6) & 7; }
|
||||
uint32_t laneW() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 9) & 7; }
|
||||
unsigned lane(unsigned i) const {
|
||||
MOZ_ASSERT(i < arity_);
|
||||
return lane_[i];
|
||||
}
|
||||
|
||||
bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
|
||||
return ((x << 0) | (y << 3) | (z << 6) | (w << 9)) == laneMask_;
|
||||
return arity_ == 4 && lane(0) == x && lane(1) == y && lane(2) == z &&
|
||||
lane(3) == w;
|
||||
}
|
||||
};
|
||||
|
||||
// Applies a shuffle operation to the input, putting the input lanes as
|
||||
// Applies a swizzle operation to the input, putting the input lanes as
|
||||
// indicated in the output register's lanes. This implements the SIMD.js
|
||||
// "shuffle" function, that takes one vector and one mask.
|
||||
// "swizzle" function, that takes one vector and an array of lane indexes.
|
||||
class MSimdSwizzle
|
||||
: public MUnaryInstruction,
|
||||
public MSimdShuffleBase,
|
||||
public NoTypePolicy::Data
|
||||
{
|
||||
protected:
|
||||
MSimdSwizzle(MDefinition* obj,
|
||||
uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
|
||||
: MUnaryInstruction(obj), MSimdShuffleBase(laneX, laneY, laneZ, laneW, obj->type())
|
||||
MSimdSwizzle(MDefinition* obj, const uint8_t lanes[])
|
||||
: MUnaryInstruction(obj), MSimdShuffleBase(lanes, obj->type())
|
||||
{
|
||||
MOZ_ASSERT(laneX < 4 && laneY < 4 && laneZ < 4 && laneW < 4);
|
||||
for (unsigned i = 0; i < arity_; i++)
|
||||
MOZ_ASSERT(lane(i) < arity_);
|
||||
setResultType(obj->type());
|
||||
setMovable();
|
||||
}
|
||||
@ -2015,10 +2013,9 @@ class MSimdSwizzle
|
||||
public:
|
||||
INSTRUCTION_HEADER(SimdSwizzle)
|
||||
|
||||
static MSimdSwizzle* New(TempAllocator& alloc, MDefinition* obj,
|
||||
uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
|
||||
static MSimdSwizzle* New(TempAllocator& alloc, MDefinition* obj, const uint8_t lanes[])
|
||||
{
|
||||
return new(alloc) MSimdSwizzle(obj, laneX, laneY, laneZ, laneW);
|
||||
return new(alloc) MSimdSwizzle(obj, lanes);
|
||||
}
|
||||
|
||||
bool congruentTo(const MDefinition* ins) const override {
|
||||
@ -2037,7 +2034,7 @@ class MSimdSwizzle
|
||||
ALLOW_CLONE(MSimdSwizzle)
|
||||
};
|
||||
|
||||
// A "general swizzle" is a swizzle or a shuffle with non-constant lane
|
||||
// A "general shuffle" is a swizzle or a shuffle with non-constant lane
|
||||
// indices. This is the one that Ion inlines and it can be folded into a
|
||||
// MSimdSwizzle/MSimdShuffle if lane indices are constant. Performance of
|
||||
// general swizzle/shuffle does not really matter, as we expect to get
|
||||
@ -2114,22 +2111,21 @@ class MSimdGeneralShuffle :
|
||||
}
|
||||
};
|
||||
|
||||
// Applies a shuffle operation to the inputs, selecting the 2 first lanes of the
|
||||
// output from lanes of the first input, and the 2 last lanes of the output from
|
||||
// lanes of the second input.
|
||||
// Applies a shuffle operation to the inputs. The lane indexes select a source
|
||||
// lane from the concatenation of the two input vectors.
|
||||
class MSimdShuffle
|
||||
: public MBinaryInstruction,
|
||||
public MSimdShuffleBase,
|
||||
public NoTypePolicy::Data
|
||||
{
|
||||
MSimdShuffle(MDefinition* lhs, MDefinition* rhs,
|
||||
uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
|
||||
: MBinaryInstruction(lhs, rhs), MSimdShuffleBase(laneX, laneY, laneZ, laneW, lhs->type())
|
||||
MSimdShuffle(MDefinition* lhs, MDefinition* rhs, const uint8_t lanes[])
|
||||
: MBinaryInstruction(lhs, rhs), MSimdShuffleBase(lanes, lhs->type())
|
||||
{
|
||||
MOZ_ASSERT(laneX < 8 && laneY < 8 && laneZ < 8 && laneW < 8);
|
||||
MOZ_ASSERT(IsSimdType(lhs->type()));
|
||||
MOZ_ASSERT(IsSimdType(rhs->type()));
|
||||
MOZ_ASSERT(lhs->type() == rhs->type());
|
||||
for (unsigned i = 0; i < arity_; i++)
|
||||
MOZ_ASSERT(lane(i) < 2 * arity_);
|
||||
setResultType(lhs->type());
|
||||
setMovable();
|
||||
}
|
||||
@ -2138,25 +2134,32 @@ class MSimdShuffle
|
||||
INSTRUCTION_HEADER(SimdShuffle)
|
||||
|
||||
static MInstruction* New(TempAllocator& alloc, MDefinition* lhs, MDefinition* rhs,
|
||||
uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
|
||||
const uint8_t lanes[])
|
||||
{
|
||||
unsigned arity = SimdTypeToLength(lhs->type());
|
||||
|
||||
// Swap operands so that new lanes come from LHS in majority.
|
||||
// In the balanced case, swap operands if needs be, in order to be able
|
||||
// to do only one vshufps on x86.
|
||||
unsigned lanesFromLHS = (laneX < 4) + (laneY < 4) + (laneZ < 4) + (laneW < 4);
|
||||
if (lanesFromLHS < 2 || (lanesFromLHS == 2 && laneX >= 4 && laneY >=4)) {
|
||||
laneX = (laneX + 4) % 8;
|
||||
laneY = (laneY + 4) % 8;
|
||||
laneZ = (laneZ + 4) % 8;
|
||||
laneW = (laneW + 4) % 8;
|
||||
mozilla::Swap(lhs, rhs);
|
||||
unsigned lanesFromLHS = 0;
|
||||
for (unsigned i = 0; i < arity; i++) {
|
||||
if (lanes[i] < arity)
|
||||
lanesFromLHS++;
|
||||
}
|
||||
|
||||
if (lanesFromLHS < arity / 2 ||
|
||||
(arity == 4 && lanesFromLHS == 2 && lanes[0] >= 4 && lanes[1] >= 4)) {
|
||||
mozilla::Array<uint8_t, 16> newLanes;
|
||||
for (unsigned i = 0; i < arity; i++)
|
||||
newLanes[i] = (lanes[i] + arity) % (2 * arity);
|
||||
return New(alloc, rhs, lhs, &newLanes[0]);
|
||||
}
|
||||
|
||||
// If all lanes come from the same vector, just use swizzle instead.
|
||||
if (laneX < 4 && laneY < 4 && laneZ < 4 && laneW < 4)
|
||||
return MSimdSwizzle::New(alloc, lhs, laneX, laneY, laneZ, laneW);
|
||||
if (lanesFromLHS == arity)
|
||||
return MSimdSwizzle::New(alloc, lhs, lanes);
|
||||
|
||||
return new(alloc) MSimdShuffle(lhs, rhs, laneX, laneY, laneZ, laneW);
|
||||
return new(alloc) MSimdShuffle(lhs, rhs, lanes);
|
||||
}
|
||||
|
||||
bool congruentTo(const MDefinition* ins) const override {
|
||||
|
@ -344,10 +344,7 @@ class LSimdSwizzleBase : public LInstructionHelper<1, 1, 0>
|
||||
return getOperand(0);
|
||||
}
|
||||
|
||||
uint32_t laneX() const { return mir_->toSimdSwizzle()->laneX(); }
|
||||
uint32_t laneY() const { return mir_->toSimdSwizzle()->laneY(); }
|
||||
uint32_t laneZ() const { return mir_->toSimdSwizzle()->laneZ(); }
|
||||
uint32_t laneW() const { return mir_->toSimdSwizzle()->laneW(); }
|
||||
uint32_t lane(unsigned i) const { return mir_->toSimdSwizzle()->lane(i); }
|
||||
|
||||
bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
|
||||
return mir_->toSimdSwizzle()->lanesMatch(x, y, z, w);
|
||||
@ -429,10 +426,7 @@ class LSimdShuffle : public LInstructionHelper<1, 2, 1>
|
||||
return getTemp(0);
|
||||
}
|
||||
|
||||
uint32_t laneX() const { return mir_->toSimdShuffle()->laneX(); }
|
||||
uint32_t laneY() const { return mir_->toSimdShuffle()->laneY(); }
|
||||
uint32_t laneZ() const { return mir_->toSimdShuffle()->laneZ(); }
|
||||
uint32_t laneW() const { return mir_->toSimdShuffle()->laneW(); }
|
||||
uint32_t lane(unsigned i) const { return mir_->toSimdShuffle()->lane(i); }
|
||||
|
||||
bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
|
||||
return mir_->toSimdShuffle()->lanesMatch(x, y, z, w);
|
||||
|
@ -2901,10 +2901,10 @@ CodeGeneratorX86Shared::visitSimdSwizzleI(LSimdSwizzleI* ins)
|
||||
FloatRegister input = ToFloatRegister(ins->input());
|
||||
FloatRegister output = ToFloatRegister(ins->output());
|
||||
|
||||
uint32_t x = ins->laneX();
|
||||
uint32_t y = ins->laneY();
|
||||
uint32_t z = ins->laneZ();
|
||||
uint32_t w = ins->laneW();
|
||||
uint32_t x = ins->lane(0);
|
||||
uint32_t y = ins->lane(1);
|
||||
uint32_t z = ins->lane(2);
|
||||
uint32_t w = ins->lane(3);
|
||||
|
||||
uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
|
||||
masm.shuffleInt32(mask, input, output);
|
||||
@ -2916,10 +2916,10 @@ CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF* ins)
|
||||
FloatRegister input = ToFloatRegister(ins->input());
|
||||
FloatRegister output = ToFloatRegister(ins->output());
|
||||
|
||||
uint32_t x = ins->laneX();
|
||||
uint32_t y = ins->laneY();
|
||||
uint32_t z = ins->laneZ();
|
||||
uint32_t w = ins->laneW();
|
||||
uint32_t x = ins->lane(0);
|
||||
uint32_t y = ins->lane(1);
|
||||
uint32_t z = ins->lane(2);
|
||||
uint32_t w = ins->lane(3);
|
||||
|
||||
if (AssemblerX86Shared::HasSSE3()) {
|
||||
if (ins->lanesMatch(0, 0, 2, 2)) {
|
||||
@ -2973,10 +2973,10 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle* ins)
|
||||
Operand rhs = ToOperand(ins->rhs());
|
||||
FloatRegister out = ToFloatRegister(ins->output());
|
||||
|
||||
uint32_t x = ins->laneX();
|
||||
uint32_t y = ins->laneY();
|
||||
uint32_t z = ins->laneZ();
|
||||
uint32_t w = ins->laneW();
|
||||
uint32_t x = ins->lane(0);
|
||||
uint32_t y = ins->lane(1);
|
||||
uint32_t z = ins->lane(2);
|
||||
uint32_t w = ins->lane(3);
|
||||
|
||||
// Check that lanes come from LHS in majority:
|
||||
unsigned numLanesFromLHS = (x < 4) + (y < 4) + (z < 4) + (w < 4);
|
||||
|
Loading…
Reference in New Issue
Block a user