Bug 1136226 - Make MSimdSwizzle and MSimdShuffle length-agnostic. r=bbouvier

Constructors and factories take lane lists as arrays instead of four separate lane arguments.
2024-10-25 11:15:34 +00:00 · 2016-05-09 16:48:31 -07:00 · 2016-05-09 16:48:31 -07:00 · 5824c75e03
commit 5824c75e03
parent 3da4ad1117
8 changed files with 87 additions and 89 deletions
--- a/js/src/asmjs/WasmBinaryIterator.h
+++ b/js/src/asmjs/WasmBinaryIterator.h
@ -493,8 +493,8 @@ class MOZ_STACK_CLASS ExprIter : private Policy
    MOZ_MUST_USE bool readReplaceLane(ValType simdType, uint8_t* lane,
                                      Value* vector, Value* scalar);
    MOZ_MUST_USE bool readSplat(ValType simdType, Value* scalar);
-    MOZ_MUST_USE bool readSwizzle(ValType simdType, uint8_t (* lanes)[4], Value* vector);
-    MOZ_MUST_USE bool readShuffle(ValType simdType, uint8_t (* lanes)[4],
+    MOZ_MUST_USE bool readSwizzle(ValType simdType, uint8_t (* lanes)[16], Value* vector);
+    MOZ_MUST_USE bool readShuffle(ValType simdType, uint8_t (* lanes)[16],
                                  Value* lhs, Value* rhs);
    MOZ_MUST_USE bool readSimdSelect(ValType simdType, Value* trueValue,
                                     Value* falseValue,
@ -1652,7 +1652,7 @@ ExprIter<Policy>::readSplat(ValType simdType, Value* scalar)

 template <typename Policy>
 inline bool
-ExprIter<Policy>::readSwizzle(ValType simdType, uint8_t (* lanes)[4], Value* vector)
+ExprIter<Policy>::readSwizzle(ValType simdType, uint8_t (* lanes)[16], Value* vector)
 {
    MOZ_ASSERT(Classify(expr_) == ExprKind::Swizzle);

@ -1676,7 +1676,7 @@ ExprIter<Policy>::readSwizzle(ValType simdType, uint8_t (* lanes)[4], Value* vec

 template <typename Policy>
 inline bool
-ExprIter<Policy>::readShuffle(ValType simdType, uint8_t (* lanes)[4], Value* lhs, Value* rhs)
+ExprIter<Policy>::readShuffle(ValType simdType, uint8_t (* lanes)[16], Value* lhs, Value* rhs)
 {
    MOZ_ASSERT(Classify(expr_) == ExprKind::Shuffle);

--- a/js/src/asmjs/WasmIonCompile.cpp
+++ b/js/src/asmjs/WasmIonCompile.cpp
@ -340,26 +340,25 @@ class FunctionCompiler
        return ins;
    }

-    MDefinition* swizzleSimd(MDefinition* vector, int32_t X, int32_t Y, int32_t Z, int32_t W,
-                             MIRType type)
+    MDefinition* swizzleSimd(MDefinition* vector, const uint8_t lanes[], MIRType type)
    {
        if (inDeadCode())
            return nullptr;

        MOZ_ASSERT(vector->type() == type);
-        MSimdSwizzle* ins = MSimdSwizzle::New(alloc(), vector, X, Y, Z, W);
+        MSimdSwizzle* ins = MSimdSwizzle::New(alloc(), vector, lanes);
        curBlock_->add(ins);
        return ins;
    }

-    MDefinition* shuffleSimd(MDefinition* lhs, MDefinition* rhs, int32_t X, int32_t Y,
-                             int32_t Z, int32_t W, MIRType type)
+    MDefinition* shuffleSimd(MDefinition* lhs, MDefinition* rhs, const uint8_t lanes[],
+                             MIRType type)
    {
        if (inDeadCode())
            return nullptr;

        MOZ_ASSERT(lhs->type() == type);
-        MInstruction* ins = MSimdShuffle::New(alloc(), lhs, rhs, X, Y, Z, W);
+        MInstruction* ins = MSimdShuffle::New(alloc(), lhs, rhs, lanes);
        curBlock_->add(ins);
        return ins;
    }
@ -2433,27 +2432,25 @@ EmitSimdConvert(FunctionCompiler& f, ValType fromType, ValType toType, SimdSign
 static bool
 EmitSimdSwizzle(FunctionCompiler& f, ValType simdType)
 {
-    uint8_t lanes[4];
+    uint8_t lanes[16];
    MDefinition* vector;
    if (!f.iter().readSwizzle(simdType, &lanes, &vector))
        return false;

-    f.iter().setResult(f.swizzleSimd(vector, lanes[0], lanes[1], lanes[2], lanes[3],
-                                     ToMIRType(simdType)));
+    f.iter().setResult(f.swizzleSimd(vector, lanes, ToMIRType(simdType)));
    return true;
 }

 static bool
 EmitSimdShuffle(FunctionCompiler& f, ValType simdType)
 {
-    uint8_t lanes[4];
+    uint8_t lanes[16];
    MDefinition* lhs;
    MDefinition* rhs;
    if (!f.iter().readShuffle(simdType, &lanes, &lhs, &rhs))
        return false;

-    f.iter().setResult(f.shuffleSimd(lhs, rhs, lanes[0], lanes[1], lanes[2], lanes[3],
-                                     ToMIRType(simdType)));
+    f.iter().setResult(f.shuffleSimd(lhs, rhs, lanes, ToMIRType(simdType)));
    return true;
 }

--- a/js/src/jit/FixedList.h
+++ b/js/src/jit/FixedList.h
@ -85,6 +85,10 @@ class FixedList
        return list_[index];
    }

+    T* data() {
+        return list_;
+    }
+
    T* begin() {
        return list_;
    }
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@ -4508,9 +4508,9 @@ LIRGenerator::visitSimdShuffle(MSimdShuffle* ins)
    MOZ_ASSERT(IsSimdType(ins->type()));
    MOZ_ASSERT(ins->type() == MIRType::Int32x4 || ins->type() == MIRType::Float32x4);

-    bool zFromLHS = ins->laneZ() < 4;
-    bool wFromLHS = ins->laneW() < 4;
-    uint32_t lanesFromLHS = (ins->laneX() < 4) + (ins->laneY() < 4) + zFromLHS + wFromLHS;
+    bool zFromLHS = ins->lane(2) < 4;
+    bool wFromLHS = ins->lane(3) < 4;
+    uint32_t lanesFromLHS = (ins->lane(0) < 4) + (ins->lane(1) < 4) + zFromLHS + wFromLHS;

    LSimdShuffle* lir = new (alloc()) LSimdShuffle();
    lowerForFPU(lir, ins, ins->lhs(), ins->rhs());
--- a/js/src/jit/MIR.cpp
+++ b/js/src/jit/MIR.cpp
@ -1191,7 +1191,7 @@ MSimdSwizzle::foldsTo(TempAllocator& alloc)
 MDefinition*
 MSimdGeneralShuffle::foldsTo(TempAllocator& alloc)
 {
-    FixedList<uint32_t> lanes;
+    FixedList<uint8_t> lanes;
    if (!lanes.init(alloc, numLanes()))
        return this;

@ -1199,16 +1199,16 @@ MSimdGeneralShuffle::foldsTo(TempAllocator& alloc)
        if (!lane(i)->isConstant() || lane(i)->type() != MIRType::Int32)
            return this;
        int32_t temp = lane(i)->toConstant()->toInt32();
-        if (temp < 0 || uint32_t(temp) >= numLanes() * numVectors())
+        if (temp < 0 || unsigned(temp) >= numLanes() * numVectors())
            return this;
-        lanes[i] = uint32_t(temp);
+        lanes[i] = uint8_t(temp);
    }

    if (numVectors() == 1)
-        return MSimdSwizzle::New(alloc, vector(0), lanes[0], lanes[1], lanes[2], lanes[3]);
+        return MSimdSwizzle::New(alloc, vector(0), lanes.data());

    MOZ_ASSERT(numVectors() == 2);
-    return MSimdShuffle::New(alloc, vector(0), vector(1), lanes[0], lanes[1], lanes[2], lanes[3]);
+    return MSimdShuffle::New(alloc, vector(0), vector(1), lanes.data());
 }

 MInstruction*
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@ -1963,51 +1963,49 @@ class MSimdAnyTrue
 class MSimdShuffleBase
 {
  protected:
-    // As of now, there are at most 4 lanes. For each lane, we need to know
-    // which input we choose and which of the 4 lanes we choose; that can be
-    // packed in 3 bits for each lane, so 12 bits in total.
-    uint32_t laneMask_;
+    // As of now, there are at most 16 lanes. For each lane, we need to know
+    // which input we choose and which of the lanes we choose.
+    mozilla::Array<uint8_t, 16> lane_;
    uint32_t arity_;

-    MSimdShuffleBase(uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW, MIRType type)
+    MSimdShuffleBase(const uint8_t lanes[], MIRType type)
    {
-        MOZ_ASSERT(SimdTypeToLength(type) == 4);
-        MOZ_ASSERT(IsSimdType(type));
-        laneMask_ = (laneX << 0) | (laneY << 3) | (laneZ << 6) | (laneW << 9);
-        arity_ = 4;
+        arity_ = SimdTypeToLength(type);
+        for (unsigned i = 0; i < arity_; i++)
+            lane_[i] = lanes[i];
    }

    bool sameLanes(const MSimdShuffleBase* other) const {
-        return laneMask_ == other->laneMask_;
+        return arity_ == other->arity_ &&
+               memcmp(&lane_[0], &other->lane_[0], arity_) == 0;
    }

  public:
-    // For now, these formulas are fine for x4 types. They'll need to be
-    // generalized for other SIMD type lengths.
-    uint32_t laneX() const { MOZ_ASSERT(arity_ == 4); return laneMask_ & 7; }
-    uint32_t laneY() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 3) & 7; }
-    uint32_t laneZ() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 6) & 7; }
-    uint32_t laneW() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 9) & 7; }
+    unsigned lane(unsigned i) const {
+        MOZ_ASSERT(i < arity_);
+        return lane_[i];
+    }

    bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
-        return ((x << 0) | (y << 3) | (z << 6) | (w << 9)) == laneMask_;
+        return arity_ == 4 && lane(0) == x && lane(1) == y && lane(2) == z &&
+               lane(3) == w;
    }
 };

-// Applies a shuffle operation to the input, putting the input lanes as
+// Applies a swizzle operation to the input, putting the input lanes as
 // indicated in the output register's lanes. This implements the SIMD.js
-// "shuffle" function, that takes one vector and one mask.
+// "swizzle" function, that takes one vector and an array of lane indexes.
 class MSimdSwizzle
  : public MUnaryInstruction,
    public MSimdShuffleBase,
    public NoTypePolicy::Data
 {
  protected:
-    MSimdSwizzle(MDefinition* obj,
-                 uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
-      : MUnaryInstruction(obj), MSimdShuffleBase(laneX, laneY, laneZ, laneW, obj->type())
+    MSimdSwizzle(MDefinition* obj, const uint8_t lanes[])
+      : MUnaryInstruction(obj), MSimdShuffleBase(lanes, obj->type())
    {
-        MOZ_ASSERT(laneX < 4 && laneY < 4 && laneZ < 4 && laneW < 4);
+        for (unsigned i = 0; i < arity_; i++)
+            MOZ_ASSERT(lane(i) < arity_);
        setResultType(obj->type());
        setMovable();
    }
@ -2015,10 +2013,9 @@ class MSimdSwizzle
  public:
    INSTRUCTION_HEADER(SimdSwizzle)

-    static MSimdSwizzle* New(TempAllocator& alloc, MDefinition* obj,
-                             uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
+    static MSimdSwizzle* New(TempAllocator& alloc, MDefinition* obj, const uint8_t lanes[])
    {
-        return new(alloc) MSimdSwizzle(obj, laneX, laneY, laneZ, laneW);
+        return new(alloc) MSimdSwizzle(obj, lanes);
    }

    bool congruentTo(const MDefinition* ins) const override {
@ -2037,7 +2034,7 @@ class MSimdSwizzle
    ALLOW_CLONE(MSimdSwizzle)
 };

-// A "general swizzle" is a swizzle or a shuffle with non-constant lane
+// A "general shuffle" is a swizzle or a shuffle with non-constant lane
 // indices.  This is the one that Ion inlines and it can be folded into a
 // MSimdSwizzle/MSimdShuffle if lane indices are constant.  Performance of
 // general swizzle/shuffle does not really matter, as we expect to get
@ -2114,22 +2111,21 @@ class MSimdGeneralShuffle :
    }
 };

-// Applies a shuffle operation to the inputs, selecting the 2 first lanes of the
-// output from lanes of the first input, and the 2 last lanes of the output from
-// lanes of the second input.
+// Applies a shuffle operation to the inputs. The lane indexes select a source
+// lane from the concatenation of the two input vectors.
 class MSimdShuffle
  : public MBinaryInstruction,
    public MSimdShuffleBase,
    public NoTypePolicy::Data
 {
-    MSimdShuffle(MDefinition* lhs, MDefinition* rhs,
-                 uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
-      : MBinaryInstruction(lhs, rhs), MSimdShuffleBase(laneX, laneY, laneZ, laneW, lhs->type())
+    MSimdShuffle(MDefinition* lhs, MDefinition* rhs, const uint8_t lanes[])
+      : MBinaryInstruction(lhs, rhs), MSimdShuffleBase(lanes, lhs->type())
    {
-        MOZ_ASSERT(laneX < 8 && laneY < 8 && laneZ < 8 && laneW < 8);
        MOZ_ASSERT(IsSimdType(lhs->type()));
        MOZ_ASSERT(IsSimdType(rhs->type()));
        MOZ_ASSERT(lhs->type() == rhs->type());
+        for (unsigned i = 0; i < arity_; i++)
+            MOZ_ASSERT(lane(i) < 2 * arity_);
        setResultType(lhs->type());
        setMovable();
    }
@ -2138,25 +2134,32 @@ class MSimdShuffle
    INSTRUCTION_HEADER(SimdShuffle)

    static MInstruction* New(TempAllocator& alloc, MDefinition* lhs, MDefinition* rhs,
-                             uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
+                             const uint8_t lanes[])
    {
+        unsigned arity = SimdTypeToLength(lhs->type());
+
        // Swap operands so that new lanes come from LHS in majority.
        // In the balanced case, swap operands if needs be, in order to be able
        // to do only one vshufps on x86.
-        unsigned lanesFromLHS = (laneX < 4) + (laneY < 4) + (laneZ < 4) + (laneW < 4);
-        if (lanesFromLHS < 2 || (lanesFromLHS == 2 && laneX >= 4 && laneY >=4)) {
-            laneX = (laneX + 4) % 8;
-            laneY = (laneY + 4) % 8;
-            laneZ = (laneZ + 4) % 8;
-            laneW = (laneW + 4) % 8;
-            mozilla::Swap(lhs, rhs);
+        unsigned lanesFromLHS = 0;
+        for (unsigned i = 0; i < arity; i++) {
+            if (lanes[i] < arity)
+                lanesFromLHS++;
+        }
+
+        if (lanesFromLHS < arity / 2 ||
+            (arity == 4 && lanesFromLHS == 2 && lanes[0] >= 4 && lanes[1] >= 4)) {
+            mozilla::Array<uint8_t, 16> newLanes;
+            for (unsigned i = 0; i < arity; i++)
+                newLanes[i] = (lanes[i] + arity) % (2 * arity);
+            return New(alloc, rhs, lhs, &newLanes[0]);
        }

        // If all lanes come from the same vector, just use swizzle instead.
-        if (laneX < 4 && laneY < 4 && laneZ < 4 && laneW < 4)
-            return MSimdSwizzle::New(alloc, lhs, laneX, laneY, laneZ, laneW);
+        if (lanesFromLHS == arity)
+            return MSimdSwizzle::New(alloc, lhs, lanes);

-        return new(alloc) MSimdShuffle(lhs, rhs, laneX, laneY, laneZ, laneW);
+        return new(alloc) MSimdShuffle(lhs, rhs, lanes);
    }

    bool congruentTo(const MDefinition* ins) const override {
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@ -344,10 +344,7 @@ class LSimdSwizzleBase : public LInstructionHelper<1, 1, 0>
        return getOperand(0);
    }

-    uint32_t laneX() const { return mir_->toSimdSwizzle()->laneX(); }
-    uint32_t laneY() const { return mir_->toSimdSwizzle()->laneY(); }
-    uint32_t laneZ() const { return mir_->toSimdSwizzle()->laneZ(); }
-    uint32_t laneW() const { return mir_->toSimdSwizzle()->laneW(); }
+    uint32_t lane(unsigned i) const { return mir_->toSimdSwizzle()->lane(i); }

    bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
        return mir_->toSimdSwizzle()->lanesMatch(x, y, z, w);
@ -429,10 +426,7 @@ class LSimdShuffle : public LInstructionHelper<1, 2, 1>
        return getTemp(0);
    }

-    uint32_t laneX() const { return mir_->toSimdShuffle()->laneX(); }
-    uint32_t laneY() const { return mir_->toSimdShuffle()->laneY(); }
-    uint32_t laneZ() const { return mir_->toSimdShuffle()->laneZ(); }
-    uint32_t laneW() const { return mir_->toSimdShuffle()->laneW(); }
+    uint32_t lane(unsigned i) const { return mir_->toSimdShuffle()->lane(i); }

    bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
        return mir_->toSimdShuffle()->lanesMatch(x, y, z, w);
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@ -2901,10 +2901,10 @@ CodeGeneratorX86Shared::visitSimdSwizzleI(LSimdSwizzleI* ins)
    FloatRegister input = ToFloatRegister(ins->input());
    FloatRegister output = ToFloatRegister(ins->output());

-    uint32_t x = ins->laneX();
-    uint32_t y = ins->laneY();
-    uint32_t z = ins->laneZ();
-    uint32_t w = ins->laneW();
+    uint32_t x = ins->lane(0);
+    uint32_t y = ins->lane(1);
+    uint32_t z = ins->lane(2);
+    uint32_t w = ins->lane(3);

    uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
    masm.shuffleInt32(mask, input, output);
@ -2916,10 +2916,10 @@ CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF* ins)
    FloatRegister input = ToFloatRegister(ins->input());
    FloatRegister output = ToFloatRegister(ins->output());

-    uint32_t x = ins->laneX();
-    uint32_t y = ins->laneY();
-    uint32_t z = ins->laneZ();
-    uint32_t w = ins->laneW();
+    uint32_t x = ins->lane(0);
+    uint32_t y = ins->lane(1);
+    uint32_t z = ins->lane(2);
+    uint32_t w = ins->lane(3);

    if (AssemblerX86Shared::HasSSE3()) {
        if (ins->lanesMatch(0, 0, 2, 2)) {
@ -2973,10 +2973,10 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle* ins)
    Operand rhs = ToOperand(ins->rhs());
    FloatRegister out = ToFloatRegister(ins->output());

-    uint32_t x = ins->laneX();
-    uint32_t y = ins->laneY();
-    uint32_t z = ins->laneZ();
-    uint32_t w = ins->laneW();
+    uint32_t x = ins->lane(0);
+    uint32_t y = ins->lane(1);
+    uint32_t z = ins->lane(2);
+    uint32_t w = ins->lane(3);

    // Check that lanes come from LHS in majority:
    unsigned numLanesFromLHS = (x < 4) + (y < 4) + (z < 4) + (w < 4);