Bug 1341265 - Part 7: Optimise Set.prototype.has for non-GC things in CacheIR. r=iain

Inline `Set.prototype.has` in CacheIR when called with non-GC things. We have to inline the following steps: 1. Guard the input is a non-GC thing. 2. Normalise the input, i.e. transform doubles to int32 when possible and canonicalise NaN values. 3. Hash the input through `mozilla::HashGeneric()`. 4. Perform the hash lookup. The hash lookup already uses templates and handles BigInts in preparation for the next parts. Differential Revision: https://phabricator.services.mozilla.com/D118973
2024-11-24 21:31:04 +00:00 · 2021-08-10 09:55:20 +00:00 · 2021-08-10 09:55:20 +00:00 · 829ce6f05d
commit 829ce6f05d
parent 5c18e1b238
8 changed files with 456 additions and 0 deletions
--- a/js/src/builtin/MapObject.h
+++ b/js/src/builtin/MapObject.h
@ -273,6 +273,10 @@ class SetObject : public NativeObject {

  size_t sizeOfData(mozilla::MallocSizeOf mallocSizeOf);

+  static constexpr size_t getDataSlotOffset() {
+    return getFixedSlotOffset(DataSlot);
+  }
+
 private:
  static const ClassSpec classSpec_;
  static const JSClassOps classOps_;
--- a/js/src/ds/OrderedHashTable.h
+++ b/js/src/ds/OrderedHashTable.h
@ -597,12 +597,19 @@ class OrderedHashTable {
    return offsetof(OrderedHashTable, dataLength);
  }
  static size_t offsetOfData() { return offsetof(OrderedHashTable, data); }
+  static constexpr size_t offsetOfHashTable() {
+    return offsetof(OrderedHashTable, hashTable);
+  }
+  static constexpr size_t offsetOfHashShift() {
+    return offsetof(OrderedHashTable, hashShift);
+  }
  static constexpr size_t offsetOfDataElement() {
    static_assert(offsetof(Data, element) == 0,
                  "RangeFront and RangePopFront depend on offsetof(Data, "
                  "element) being 0");
    return offsetof(Data, element);
  }
+  static constexpr size_t offsetOfDataChain() { return offsetof(Data, chain); }
  static constexpr size_t sizeofData() { return sizeof(Data); }

 private:
@ -901,9 +908,18 @@ class OrderedHashSet {
  static size_t offsetOfEntryKey() { return 0; }
  static size_t offsetOfImplDataLength() { return Impl::offsetOfDataLength(); }
  static size_t offsetOfImplData() { return Impl::offsetOfData(); }
+  static constexpr size_t offsetOfImplHashTable() {
+    return Impl::offsetOfHashTable();
+  }
+  static constexpr size_t offsetOfImplHashShift() {
+    return Impl::offsetOfHashShift();
+  }
  static constexpr size_t offsetOfImplDataElement() {
    return Impl::offsetOfDataElement();
  }
+  static constexpr size_t offsetOfImplDataChain() {
+    return Impl::offsetOfDataChain();
+  }
  static constexpr size_t sizeofImplData() { return Impl::sizeofData(); }

  size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const {
--- a/js/src/jit-test/tests/cacheir/set-has-nongcthing.js
+++ b/js/src/jit-test/tests/cacheir/set-has-nongcthing.js
@ -0,0 +1,110 @@
+// Return a new set, possibly filling some dummy entries to enforce creating
+// multiple hash buckets.
+function createSet(values, n) {
+  var xs = [...values];
+  for (var i = 0; i < n; ++i) {
+    xs.push({});
+  }
+  return new Set(xs);
+}
+
+function runTest(fn) {
+  fn(0);
+  fn(100);
+}
+
+function testInt32(n) {
+  var xs = [1, 2];
+  var ys = [3, 4];
+  var zs = [...xs, ...ys];
+  var set = createSet(xs, n);
+
+  var N = 100;
+  var c = 0;
+  for (var i = 0; i < N; ++i) {
+    var z = zs[i & 3];
+    if (set.has(z)) c++;
+  }
+  assertEq(c, N / 2);
+}
+runTest(testInt32);
+
+function testDouble(n) {
+  var xs = [Math.PI, Infinity];
+  var ys = [Math.E, -Infinity];
+  var zs = [...xs, ...ys];
+  var set = createSet(xs, n);
+
+  var N = 100;
+  var c = 0;
+  for (var i = 0; i < N; ++i) {
+    var z = zs[i & 3];
+    if (set.has(z)) c++;
+  }
+  assertEq(c, N / 2);
+}
+runTest(testDouble);
+
+function testZero(n) {
+  var xs = [0, -0];
+  var ys = [1, -1];
+  var zs = [...xs, ...ys];
+  var set = createSet([0], n);
+
+  var N = 100;
+  var c = 0;
+  for (var i = 0; i < N; ++i) {
+    var z = zs[i & 3];
+    if (set.has(z)) c++;
+  }
+  assertEq(c, N / 2);
+}
+runTest(testZero);
+
+function testNaN(n) {
+  var xs = [NaN, -NaN];
+  var ys = [1, -1];
+  var zs = [...xs, ...ys];
+  var set = createSet([NaN], n);
+
+  var N = 100;
+  var c = 0;
+  for (var i = 0; i < N; ++i) {
+    var z = zs[i & 3];
+    if (set.has(z)) c++;
+  }
+  assertEq(c, N / 2);
+}
+runTest(testNaN);
+
+function testUndefinedAndNull(n) {
+  var xs = [undefined, null];
+  var ys = [1, -1];
+  var zs = [...xs, ...ys];
+  var set = createSet(xs, n);
+
+  var N = 100;
+  var c = 0;
+  for (var i = 0; i < N; ++i) {
+    var z = zs[i & 3];
+    if (set.has(z)) c++;
+  }
+  assertEq(c, N / 2);
+}
+runTest(testUndefinedAndNull);
+
+function testBoolean(n) {
+  var xs = [true, false];
+  var ys = [1, -1];
+  var zs = [...xs, ...ys];
+  var set = createSet(xs, n);
+
+  var N = 100;
+  var c = 0;
+  for (var i = 0; i < N; ++i) {
+    var z = zs[i & 3];
+    if (set.has(z)) c++;
+  }
+  assertEq(c, N / 2);
+}
+runTest(testBoolean);
--- a/js/src/jit/CacheIR.cpp
+++ b/js/src/jit/CacheIR.cpp
@ -7790,7 +7790,40 @@ AttachDecision CallIRGenerator::tryAttachSetHas(HandleFunction callee) {

  ValOperandId argId = writer.loadArgumentFixedSlot(ArgumentKind::Arg0, argc_);

+#ifndef JS_CODEGEN_X86
+  // Assume the hash key will likely always have the same type when attaching
+  // the first stub. If the call is polymorphic on the hash key, attach a stub
+  // which handles any value.
+  if (isFirstStub_) {
+    switch (args_[0].type()) {
+      case ValueType::Double:
+      case ValueType::Int32:
+      case ValueType::Boolean:
+      case ValueType::Undefined:
+      case ValueType::Null: {
+        writer.guardToNonGCThing(argId);
+        writer.setHasNonGCThingResult(objId, argId);
+        break;
+      }
+      case ValueType::String:
+      case ValueType::Symbol:
+      case ValueType::BigInt:
+      case ValueType::Object:
+        writer.setHasResult(objId, argId);
+        break;
+
+      case ValueType::Magic:
+      case ValueType::PrivateGCThing:
+        MOZ_CRASH("Unexpected type");
+    }
+  } else {
+    writer.setHasResult(objId, argId);
+  }
+#else
+  // The optimized versions require too many registers on x86.
  writer.setHasResult(objId, argId);
+#endif
+
  writer.returnFromIC();

  trackAttached("SetHas");
--- a/js/src/jit/CacheIRCompiler.cpp
+++ b/js/src/jit/CacheIRCompiler.cpp
@ -1645,6 +1645,20 @@ bool CacheIRCompiler::emitGuardToInt32(ValOperandId inputId) {
  return true;
 }

+bool CacheIRCompiler::emitGuardToNonGCThing(ValOperandId inputId) {
+  JitSpew(JitSpew_Codegen, "%s", __FUNCTION__);
+
+  ValueOperand input = allocator.useValueRegister(masm, inputId);
+
+  FailurePath* failure;
+  if (!addFailurePath(&failure)) {
+    return false;
+  }
+
+  masm.branchTestGCThing(Assembler::Equal, input, failure->label());
+  return true;
+}
+
 // Infallible |emitDouble| emitters can use this implementation to avoid
 // generating extra clean-up instructions to restore the scratch float register.
 // To select this function simply omit the |Label* fail| parameter for the
@ -8142,6 +8156,29 @@ bool CacheIRCompiler::emitSetHasResult(ObjOperandId setId, ValOperandId valId) {
  return true;
 }

+bool CacheIRCompiler::emitSetHasNonGCThingResult(ObjOperandId setId,
+                                                 ValOperandId valId) {
+  JitSpew(JitSpew_Codegen, "%s", __FUNCTION__);
+
+  AutoOutputRegister output(*this);
+  Register set = allocator.useRegister(masm, setId);
+  ValueOperand val = allocator.useValueRegister(masm, valId);
+
+  AutoScratchRegister scratch1(allocator, masm);
+  AutoScratchRegister scratch2(allocator, masm);
+  AutoScratchRegister scratch3(allocator, masm);
+  AutoScratchRegister scratch4(allocator, masm);
+  AutoAvailableFloatRegister scratchFloat(*this, FloatReg0);
+
+  masm.toHashableNonGCThing(val, output.valueReg(), scratchFloat);
+  masm.prepareHashNonGCThing(output.valueReg(), scratch1, scratch2);
+
+  masm.setObjectHasNonBigInt(set, output.valueReg(), scratch1, scratch2,
+                             scratch3, scratch4);
+  masm.tagValue(JSVAL_TYPE_BOOLEAN, scratch2, output.valueReg());
+  return true;
+}
+
 bool CacheIRCompiler::emitBailout() {
  JitSpew(JitSpew_Codegen, "%s", __FUNCTION__);

--- a/js/src/jit/CacheIROps.yaml
+++ b/js/src/jit/CacheIROps.yaml
@ -157,6 +157,13 @@
  args:
    input: ValId

+- name: GuardToNonGCThing
+  shared: true
+  transpile: false
+  cost_estimate: 1
+  args:
+    input: ValId
+
 # If the Value is a boolean, convert it to int32.
 - name: GuardBooleanToInt32
  shared: true
@ -2614,6 +2621,14 @@
    set: ObjId
    val: ValId

+- name: SetHasNonGCThingResult
+  shared: true
+  transpile: false
+  cost_estimate: 3
+  args:
+    set: ObjId
+    val: ValId
+
 - name: CallPrintString
  shared: true
  transpile: false
--- a/js/src/jit/MacroAssembler.cpp
+++ b/js/src/jit/MacroAssembler.cpp
@ -4605,6 +4605,212 @@ void MacroAssembler::iteratorClose(Register obj, Register temp1, Register temp2,
 #endif
 }

+void MacroAssembler::toHashableNonGCThing(ValueOperand value,
+                                          ValueOperand result,
+                                          FloatRegister tempFloat) {
+  // Inline implementation of |HashableValue::setValue()|.
+
+#ifdef DEBUG
+  Label ok;
+  branchTestGCThing(Assembler::NotEqual, value, &ok);
+  assumeUnreachable("Unexpected GC thing");
+  bind(&ok);
+#endif
+
+  Label useInput, done;
+  branchTestDouble(Assembler::NotEqual, value, &useInput);
+  {
+    Register int32 = result.scratchReg();
+    unboxDouble(value, tempFloat);
+
+    // Normalize int32-valued doubles to int32 and negative zero to +0.
+    Label canonicalize;
+    convertDoubleToInt32(tempFloat, int32, &canonicalize, false);
+    {
+      tagValue(JSVAL_TYPE_INT32, int32, result);
+      jump(&done);
+    }
+    bind(&canonicalize);
+    {
+      // Normalize the sign bit of a NaN.
+      branchDouble(Assembler::DoubleOrdered, tempFloat, tempFloat, &useInput);
+      moveValue(JS::NaNValue(), result);
+      jump(&done);
+    }
+  }
+
+  bind(&useInput);
+  moveValue(value, result);
+
+  bind(&done);
+}
+
+void MacroAssembler::scrambleHashCode(Register result) {
+  // Inline implementation of |mozilla::ScrambleHashCode()|.
+
+  mul32(Imm32(mozilla::kGoldenRatioU32), result);
+}
+
+void MacroAssembler::prepareHashNonGCThing(ValueOperand value, Register result,
+                                           Register temp) {
+  // Inline implementation of |OrderedHashTable::prepareHash()| and
+  // |mozilla::HashGeneric(v.asRawBits())|.
+
+#ifdef DEBUG
+  Label ok;
+  branchTestGCThing(Assembler::NotEqual, value, &ok);
+  assumeUnreachable("Unexpected GC thing");
+  bind(&ok);
+#endif
+
+  // uint32_t v1 = static_cast<uint32_t>(aValue);
+#ifdef JS_PUNBOX64
+  move64To32(value.toRegister64(), result);
+#else
+  move32(value.payloadReg(), result);
+#endif
+
+  // uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(aValue) >> 32);
+#ifdef JS_PUNBOX64
+  auto r64 = Register64(temp);
+  move64(value.toRegister64(), r64);
+  rshift64(Imm32(32), r64);
+#else
+  // TODO: This seems like a bug in mozilla::detail::AddUintptrToHash().
+  // The uint64_t input is first converted to uintptr_t and then back to
+  // uint64_t. But |uint64_t(uintptr_t(bits))| actually only clears the high
+  // bits, so this computation:
+  //
+  // aValue = uintptr_t(bits)
+  // v2 = static_cast<uint32_t>(static_cast<uint64_t>(aValue) >> 32)
+  //
+  // really just sets |v2 = 0|. And that means the xor-operation in AddU32ToHash
+  // can be optimized away, because |x ^ 0 = x|.
+  //
+  // Filed as bug 1718516.
+#endif
+
+  // mozilla::WrappingMultiply(kGoldenRatioU32, RotateLeft5(aHash) ^ aValue);
+  // with |aHash = 0| and |aValue = v1|.
+  mul32(Imm32(mozilla::kGoldenRatioU32), result);
+
+  // mozilla::WrappingMultiply(kGoldenRatioU32, RotateLeft5(aHash) ^ aValue);
+  // with |aHash = <above hash>| and |aValue = v2|.
+  rotateLeft(Imm32(5), result, result);
+#ifdef JS_PUNBOX64
+  xor32(temp, result);
+#endif
+
+  // Combine |mul32| and |scrambleHashCode| by directly multiplying with
+  // |kGoldenRatioU32 * kGoldenRatioU32|.
+  //
+  // mul32(Imm32(mozilla::kGoldenRatioU32), result);
+  //
+  // scrambleHashCode(result);
+  mul32(Imm32(mozilla::kGoldenRatioU32 * mozilla::kGoldenRatioU32), result);
+}
+
+template <typename OrderedHashTable>
+void MacroAssembler::orderedHashTableLookup(Register setOrMapObj,
+                                            ValueOperand value, Register hash,
+                                            Register entryTemp, Register temp1,
+                                            Register temp2, Register temp3,
+                                            Register temp4, Label* found,
+                                            IsBigInt isBigInt) {
+  // Inline implementation of |OrderedHashTable::lookup()|.
+
+  MOZ_ASSERT_IF(isBigInt == IsBigInt::No, temp3 == InvalidReg);
+  MOZ_ASSERT_IF(isBigInt == IsBigInt::No, temp4 == InvalidReg);
+
+#ifdef DEBUG
+  Label ok;
+  if (isBigInt == IsBigInt::No) {
+    branchTestBigInt(Assembler::NotEqual, value, &ok);
+    assumeUnreachable("Unexpected BigInt");
+  } else if (isBigInt == IsBigInt::Yes) {
+    branchTestBigInt(Assembler::Equal, value, &ok);
+    assumeUnreachable("Unexpected non-BigInt");
+  }
+  bind(&ok);
+#endif
+
+  // Load the |ValueSet|.
+  loadPrivate(Address(setOrMapObj, SetObject::getDataSlotOffset()), temp1);
+
+  // Load the bucket.
+  move32(hash, entryTemp);
+  load32(Address(temp1, OrderedHashTable::offsetOfImplHashShift()), temp2);
+  flexibleRshift32(temp2, entryTemp);
+
+  loadPtr(Address(temp1, OrderedHashTable::offsetOfImplHashTable()), temp2);
+  loadPtr(BaseIndex(temp2, entryTemp, ScalePointer), entryTemp);
+
+  // Search for a match in this bucket.
+  Label start, loop;
+  jump(&start);
+  bind(&loop);
+  {
+    // Inline implementation of |HashableValue::operator==|.
+
+    static_assert(OrderedHashTable::offsetOfImplDataElement() == 0,
+                  "offsetof(Data, element) is 0");
+    auto keyAddr = Address(entryTemp, OrderedHashTable::offsetOfEntryKey());
+
+    if (isBigInt == IsBigInt::No) {
+      // Two HashableValues are equal if they have equal bits.
+      branch64(Assembler::Equal, keyAddr, value.toRegister64(), found);
+    } else {
+#ifdef JS_PUNBOX64
+      auto key = ValueOperand(temp1);
+#else
+      auto key = ValueOperand(temp1, temp2);
+#endif
+
+      loadValue(keyAddr, key);
+
+      // Two HashableValues are equal if they have equal bits.
+      branch64(Assembler::Equal, key.toRegister64(), value.toRegister64(),
+               found);
+
+      // BigInt values are considered equal if they represent the same
+      // mathematical value.
+      Label next;
+      fallibleUnboxBigInt(key, temp2, &next);
+      if (isBigInt == IsBigInt::Yes) {
+        unboxBigInt(value, temp1);
+      } else {
+        fallibleUnboxBigInt(value, temp1, &next);
+      }
+      equalBigInts(temp1, temp2, temp3, temp4, temp1, temp2, &next, &next,
+                   &next);
+      jump(found);
+      bind(&next);
+    }
+  }
+  loadPtr(Address(entryTemp, OrderedHashTable::offsetOfImplDataChain()),
+          entryTemp);
+  bind(&start);
+  branchTestPtr(Assembler::NonZero, entryTemp, entryTemp, &loop);
+}
+
+void MacroAssembler::setObjectHas(Register setObj, ValueOperand value,
+                                  Register hash, Register result,
+                                  Register temp1, Register temp2,
+                                  Register temp3, Register temp4,
+                                  IsBigInt isBigInt) {
+  Label found;
+  orderedHashTableLookup<ValueSet>(setObj, value, hash, result, temp1, temp2,
+                                   temp3, temp4, &found, isBigInt);
+
+  Label done;
+  move32(Imm32(0), result);
+  jump(&done);
+
+  bind(&found);
+  move32(Imm32(1), result);
+  bind(&done);
+}
+
 // Can't push large frames blindly on windows, so we must touch frame memory
 // incrementally, with no more than 4096 - 1 bytes between touches.
 //
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@ -4667,6 +4667,41 @@ class MacroAssembler : public MacroAssemblerSpecific {
  void iteratorClose(Register obj, Register temp1, Register temp2,
                     Register temp3);

+  void toHashableNonGCThing(ValueOperand value, ValueOperand result,
+                            FloatRegister tempFloat);
+
+ private:
+  void scrambleHashCode(Register result);
+
+ public:
+  void prepareHashNonGCThing(ValueOperand value, Register result,
+                             Register temp);
+
+ private:
+  enum class IsBigInt { No, Yes, Maybe };
+
+  /**
+   * Search for a value in a OrderedHashTable.
+   *
+   * When we jump to |found|, |entryTemp| holds the found hashtable entry.
+   */
+  template <typename OrderedHashTable>
+  void orderedHashTableLookup(Register setOrMapObj, ValueOperand value,
+                              Register hash, Register entryTemp, Register temp1,
+                              Register temp3, Register temp4, Register temp5,
+                              Label* found, IsBigInt isBigInt);
+
+  void setObjectHas(Register setObj, ValueOperand value, Register hash,
+                    Register result, Register temp1, Register temp2,
+                    Register temp3, Register temp4, IsBigInt isBigInt);
+
+ public:
+  void setObjectHasNonBigInt(Register setObj, ValueOperand value, Register hash,
+                             Register result, Register temp1, Register temp2) {
+    return setObjectHas(setObj, value, hash, result, temp1, temp2, InvalidReg,
+                        InvalidReg, IsBigInt::No);
+  }
+
  // Inline version of js_TypedArray_uint8_clamp_double.
  // This function clobbers the input register.
  void clampDoubleToUint8(FloatRegister input, Register output) PER_ARCH;