gecko-dev/xpcom/glue/PLDHashTable.h

622 lines
22 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef PLDHashTable_h
#define PLDHashTable_h
#include "mozilla/Atomics.h"
#include "mozilla/Attributes.h" // for MOZ_ALWAYS_INLINE
#include "mozilla/fallible.h"
#include "mozilla/MemoryReporting.h"
#include "mozilla/Move.h"
#include "mozilla/Types.h"
#include "nscore.h"
typedef uint32_t PLDHashNumber;
class PLDHashTable;
struct PLDHashTableOps;
// Table entry header structure.
//
// In order to allow in-line allocation of key and value, we do not declare
// either here. Instead, the API uses const void *key as a formal parameter.
// The key need not be stored in the entry; it may be part of the value, but
// need not be stored at all.
//
// Callback types are defined below and grouped into the PLDHashTableOps
// structure, for single static initialization per hash table sub-type.
//
// Each hash table sub-type should make its entry type a subclass of
// PLDHashEntryHdr. The mKeyHash member contains the result of multiplying the
// hash code returned from the hashKey callback (see below) by kGoldenRatio,
// then constraining the result to avoid the magic 0 and 1 values. The stored
// mKeyHash value is table size invariant, and it is maintained automatically
// -- users need never access it.
struct PLDHashEntryHdr
{
private:
friend class PLDHashTable;
PLDHashNumber mKeyHash;
};
#ifdef DEBUG
// This class does three kinds of checking:
//
// - that calls to one of |mOps| or to an enumerator do not cause re-entry into
// the table in an unsafe way;
//
// - that multiple threads do not access the table in an unsafe way;
//
// - that a table marked as immutable is not modified.
//
// "Safe" here means that multiple concurrent read operations are ok, but a
// write operation (i.e. one that can cause the entry storage to be reallocated
// or destroyed) cannot safely run concurrently with another read or write
// operation. This meaning of "safe" is only partial; for example, it does not
// cover whether a single entry in the table is modified by two separate
// threads. (Doing such checking would be much harder.)
//
// It does this with two variables:
//
// - mState, which embodies a tri-stage tagged union with the following
// variants:
// - Idle
// - Read(n), where 'n' is the number of concurrent read operations
// - Write
//
// - mIsWritable, which indicates if the table is mutable.
//
class Checker
{
public:
constexpr Checker() : mState(kIdle), mIsWritable(1) {}
Checker& operator=(Checker&& aOther) {
// Atomic<> doesn't have an |operator=(Atomic<>&&)|.
mState = uint32_t(aOther.mState);
mIsWritable = uint32_t(aOther.mIsWritable);
aOther.mState = kIdle;
return *this;
}
static bool IsIdle(uint32_t aState) { return aState == kIdle; }
static bool IsRead(uint32_t aState) { return kRead1 <= aState &&
aState <= kReadMax; }
static bool IsRead1(uint32_t aState) { return aState == kRead1; }
static bool IsWrite(uint32_t aState) { return aState == kWrite; }
bool IsIdle() const { return mState == kIdle; }
bool IsWritable() const { return !!mIsWritable; }
void SetNonWritable() { mIsWritable = 0; }
// NOTE: the obvious way to implement these functions is to (a) check
// |mState| is reasonable, and then (b) update |mState|. But the lack of
// atomicity in such an implementation can cause problems if we get unlucky
// thread interleaving between (a) and (b).
//
// So instead for |mState| we are careful to (a) first get |mState|'s old
// value and assign it a new value in single atomic operation, and only then
// (b) check the old value was reasonable. This ensures we don't have
// interleaving problems.
//
// For |mIsWritable| we don't need to be as careful because it can only in
// transition in one direction (from writable to non-writable).
void StartReadOp()
{
uint32_t oldState = mState++; // this is an atomic increment
MOZ_ASSERT(IsIdle(oldState) || IsRead(oldState));
MOZ_ASSERT(oldState < kReadMax); // check for overflow
}
void EndReadOp()
{
uint32_t oldState = mState--; // this is an atomic decrement
MOZ_ASSERT(IsRead(oldState));
}
void StartWriteOp()
{
MOZ_ASSERT(IsWritable());
uint32_t oldState = mState.exchange(kWrite);
MOZ_ASSERT(IsIdle(oldState));
}
void EndWriteOp()
{
// Check again that the table is writable, in case it was marked as
// non-writable just after the IsWritable() assertion in StartWriteOp()
// occurred.
MOZ_ASSERT(IsWritable());
uint32_t oldState = mState.exchange(kIdle);
MOZ_ASSERT(IsWrite(oldState));
}
void StartIteratorRemovalOp()
{
// When doing removals at the end of iteration, we go from Read1 state to
// Write and then back.
MOZ_ASSERT(IsWritable());
uint32_t oldState = mState.exchange(kWrite);
MOZ_ASSERT(IsRead1(oldState));
}
void EndIteratorRemovalOp()
{
// Check again that the table is writable, in case it was marked as
// non-writable just after the IsWritable() assertion in
// StartIteratorRemovalOp() occurred.
MOZ_ASSERT(IsWritable());
uint32_t oldState = mState.exchange(kRead1);
MOZ_ASSERT(IsWrite(oldState));
}
void StartDestructorOp()
{
// A destructor op is like a write, but the table doesn't need to be
// writable.
uint32_t oldState = mState.exchange(kWrite);
MOZ_ASSERT(IsIdle(oldState));
}
void EndDestructorOp()
{
uint32_t oldState = mState.exchange(kIdle);
MOZ_ASSERT(IsWrite(oldState));
}
private:
// Things of note about the representation of |mState|.
// - The values between kRead1..kReadMax represent valid Read(n) values.
// - kIdle and kRead1 are deliberately chosen so that incrementing the -
// former gives the latter.
// - 9999 concurrent readers should be enough for anybody.
static const uint32_t kIdle = 0;
static const uint32_t kRead1 = 1;
static const uint32_t kReadMax = 9999;
static const uint32_t kWrite = 10000;
mutable mozilla::Atomic<uint32_t> mState;
mutable mozilla::Atomic<uint32_t> mIsWritable;
};
#endif
// A PLDHashTable may be allocated on the stack or within another structure or
// class. No entry storage is allocated until the first element is added. This
// means that empty hash tables are cheap, which is good because they are
// common.
//
// There used to be a long, math-heavy comment here about the merits of
// double hashing vs. chaining; it was removed in bug 1058335. In short, double
// hashing is more space-efficient unless the element size gets large (in which
// case you should keep using double hashing but switch to using pointer
// elements). Also, with double hashing, you can't safely hold an entry pointer
// and use it after an add or remove operation, unless you sample Generation()
// before adding or removing, and compare the sample after, dereferencing the
// entry pointer only if Generation() has not changed.
class PLDHashTable
{
private:
// This class maintains the invariant that every time the entry store is
// changed, the generation is updated.
class EntryStore
{
private:
char* mEntryStore;
uint32_t mGeneration;
public:
EntryStore() : mEntryStore(nullptr), mGeneration(0) {}
~EntryStore()
{
free(mEntryStore);
mEntryStore = nullptr;
mGeneration++; // a little paranoid, but why not be extra safe?
}
char* Get() { return mEntryStore; }
const char* Get() const { return mEntryStore; }
void Set(char* aEntryStore)
{
mEntryStore = aEntryStore;
mGeneration++;
}
uint32_t Generation() const { return mGeneration; }
};
const PLDHashTableOps* const mOps; // Virtual operations; see below.
int16_t mHashShift; // Multiplicative hash shift.
const uint32_t mEntrySize; // Number of bytes in an entry.
uint32_t mEntryCount; // Number of entries in table.
uint32_t mRemovedCount; // Removed entry sentinels in table.
EntryStore mEntryStore; // (Lazy) entry storage and generation.
#ifdef DEBUG
mutable Checker mChecker;
#endif
public:
// Table capacity limit; do not exceed. The max capacity used to be 1<<23 but
// that occasionally that wasn't enough. Making it much bigger than 1<<26
// probably isn't worthwhile -- tables that big are kind of ridiculous.
// Also, the growth operation will (deliberately) fail if |capacity *
// mEntrySize| overflows a uint32_t, and mEntrySize is always at least 8
// bytes.
static const uint32_t kMaxCapacity = ((uint32_t)1 << 26);
static const uint32_t kMinCapacity = 8;
// Making this half of kMaxCapacity ensures it'll fit. Nobody should need an
// initial length anywhere nearly this large, anyway.
static const uint32_t kMaxInitialLength = kMaxCapacity / 2;
// This gives a default initial capacity of 8.
static const uint32_t kDefaultInitialLength = 4;
// Initialize the table with |aOps| and |aEntrySize|. The table's initial
// capacity is chosen such that |aLength| elements can be inserted without
// rehashing; if |aLength| is a power-of-two, this capacity will be
// |2*length|. However, because entry storage is allocated lazily, this
// initial capacity won't be relevant until the first element is added; prior
// to that the capacity will be zero.
//
// This will crash if |aEntrySize| and/or |aLength| are too large.
PLDHashTable(const PLDHashTableOps* aOps, uint32_t aEntrySize,
uint32_t aLength = kDefaultInitialLength);
PLDHashTable(PLDHashTable&& aOther)
// These two fields are |const|. Initialize them here because the
// move assignment operator cannot modify them.
: mOps(aOther.mOps)
, mEntrySize(aOther.mEntrySize)
// Initialize this field because it is required for a safe call to the
// destructor, which the move assignment operator does.
, mEntryStore()
#ifdef DEBUG
, mChecker()
#endif
{
*this = mozilla::Move(aOther);
}
PLDHashTable& operator=(PLDHashTable&& aOther);
~PLDHashTable();
// This should be used rarely.
const PLDHashTableOps* Ops() const { return mOps; }
// Size in entries (gross, not net of free and removed sentinels) for table.
// This can be zero if no elements have been added yet, in which case the
// entry storage will not have yet been allocated.
uint32_t Capacity() const
{
return mEntryStore.Get() ? CapacityFromHashShift() : 0;
}
uint32_t EntrySize() const { return mEntrySize; }
uint32_t EntryCount() const { return mEntryCount; }
uint32_t Generation() const { return mEntryStore.Generation(); }
// To search for a |key| in |table|, call:
//
// entry = table.Search(key);
//
// If |entry| is non-null, |key| was found. If |entry| is null, key was not
// found.
PLDHashEntryHdr* Search(const void* aKey);
// To add an entry identified by |key| to table, call:
//
// entry = table.Add(key, mozilla::fallible);
//
// If |entry| is null upon return, then the table is severely overloaded and
// memory can't be allocated for entry storage.
//
// Otherwise, |aEntry->mKeyHash| has been set so that
// PLDHashTable::EntryIsFree(entry) is false, and it is up to the caller to
// initialize the key and value parts of the entry sub-type, if they have not
// been set already (i.e. if entry was not already in the table, and if the
// optional initEntry hook was not used).
PLDHashEntryHdr* Add(const void* aKey, const mozilla::fallible_t&);
// This is like the other Add() function, but infallible, and so never
// returns null.
PLDHashEntryHdr* Add(const void* aKey);
// To remove an entry identified by |key| from table, call:
//
// table.Remove(key);
//
// If |key|'s entry is found, it is cleared (via table->mOps->clearEntry).
// The table's capacity may be reduced afterwards.
void Remove(const void* aKey);
// To remove an entry found by a prior search, call:
//
// table.RemoveEntry(entry);
//
// The entry, which must be present and in use, is cleared (via
// table->mOps->clearEntry). The table's capacity may be reduced afterwards.
void RemoveEntry(PLDHashEntryHdr* aEntry);
// Remove an entry already accessed via Search() or Add().
//
// NB: this is a "raw" or low-level method. It does not shrink the table if
// it is underloaded. Don't use it unless necessary and you know what you are
// doing, and if so, please explain in a comment why it is necessary instead
// of RemoveEntry().
void RawRemove(PLDHashEntryHdr* aEntry);
// This function is equivalent to
// ClearAndPrepareForLength(kDefaultInitialLength).
void Clear();
// This function clears the table's contents and frees its entry storage,
// leaving it in a empty state ready to be used again. Afterwards, when the
// first element is added the entry storage that gets allocated will have a
// capacity large enough to fit |aLength| elements without rehashing.
//
// It's conceptually the same as calling the destructor and then re-calling
// the constructor with the original |aOps| and |aEntrySize| arguments, and
// a new |aLength| argument.
void ClearAndPrepareForLength(uint32_t aLength);
// Measure the size of the table's entry storage. If the entries contain
// pointers to other heap blocks, you have to iterate over the table and
// measure those separately; hence the "Shallow" prefix.
size_t ShallowSizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
// Like ShallowSizeOfExcludingThis(), but includes sizeof(*this).
size_t ShallowSizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
#ifdef DEBUG
// Mark a table as immutable for the remainder of its lifetime. This
// changes the implementation from asserting one set of invariants to
// asserting a different set.
void MarkImmutable();
#endif
// If you use PLDHashEntryStub or a subclass of it as your entry struct, and
// if your entries move via memcpy and clear via memset(0), you can use these
// stub operations.
static const PLDHashTableOps* StubOps();
// The individual stub operations in StubOps().
static PLDHashNumber HashVoidPtrKeyStub(const void* aKey);
static bool MatchEntryStub(const PLDHashEntryHdr* aEntry, const void* aKey);
static void MoveEntryStub(PLDHashTable* aTable, const PLDHashEntryHdr* aFrom,
PLDHashEntryHdr* aTo);
static void ClearEntryStub(PLDHashTable* aTable, PLDHashEntryHdr* aEntry);
// Hash/match operations for tables holding C strings.
static PLDHashNumber HashStringKey(const void* aKey);
static bool MatchStringKey(const PLDHashEntryHdr* aEntry, const void* aKey);
// This is an iterator for PLDHashtable. Assertions will detect some, but not
// all, mid-iteration table modifications that might invalidate (e.g.
// reallocate) the entry storage.
//
// Any element can be removed during iteration using Remove(). If any
// elements are removed, the table may be resized once iteration ends.
//
// Example usage:
//
// for (auto iter = table.Iter(); !iter.Done(); iter.Next()) {
// auto entry = static_cast<FooEntry*>(iter.Get());
// // ... do stuff with |entry| ...
// // ... possibly call iter.Remove() once ...
// }
//
// or:
//
// for (PLDHashTable::Iterator iter(&table); !iter.Done(); iter.Next()) {
// auto entry = static_cast<FooEntry*>(iter.Get());
// // ... do stuff with |entry| ...
// // ... possibly call iter.Remove() once ...
// }
//
// The latter form is more verbose but is easier to work with when
// making subclasses of Iterator.
//
class Iterator
{
public:
explicit Iterator(PLDHashTable* aTable);
Iterator(Iterator&& aOther);
~Iterator();
// Have we finished?
bool Done() const { return mNexts == mNextsLimit; }
// Get the current entry.
PLDHashEntryHdr* Get() const
{
MOZ_ASSERT(!Done());
PLDHashEntryHdr* entry = reinterpret_cast<PLDHashEntryHdr*>(mCurrent);
MOZ_ASSERT(EntryIsLive(entry));
return entry;
}
// Advance to the next entry.
void Next();
// Remove the current entry. Must only be called once per entry, and Get()
// must not be called on that entry afterwards.
void Remove();
protected:
PLDHashTable* mTable; // Main table pointer.
private:
char* mStart; // The first entry.
char* mLimit; // One past the last entry.
char* mCurrent; // Pointer to the current entry.
uint32_t mNexts; // Number of Next() calls.
uint32_t mNextsLimit; // Next() call limit.
bool mHaveRemoved; // Have any elements been removed?
bool IsOnNonLiveEntry() const;
void MoveToNextEntry();
Iterator() = delete;
Iterator(const Iterator&) = delete;
Iterator& operator=(const Iterator&) = delete;
Iterator& operator=(const Iterator&&) = delete;
};
Iterator Iter() { return Iterator(this); }
// Use this if you need to initialize an Iterator in a const method. If you
// use this case, you should not call Remove() on the iterator.
Iterator ConstIter() const
{
return Iterator(const_cast<PLDHashTable*>(this));
}
private:
// Multiplicative hash uses an unsigned 32 bit integer and the golden ratio,
// expressed as a fixed-point 32-bit fraction.
static const uint32_t kHashBits = 32;
static const uint32_t kGoldenRatio = 0x9E3779B9U;
static uint32_t HashShift(uint32_t aEntrySize, uint32_t aLength);
static const PLDHashNumber kCollisionFlag = 1;
static bool EntryIsFree(PLDHashEntryHdr* aEntry)
{
return aEntry->mKeyHash == 0;
}
static bool EntryIsRemoved(PLDHashEntryHdr* aEntry)
{
return aEntry->mKeyHash == 1;
}
static bool EntryIsLive(PLDHashEntryHdr* aEntry)
{
return aEntry->mKeyHash >= 2;
}
static void MarkEntryFree(PLDHashEntryHdr* aEntry)
{
aEntry->mKeyHash = 0;
}
static void MarkEntryRemoved(PLDHashEntryHdr* aEntry)
{
aEntry->mKeyHash = 1;
}
PLDHashNumber Hash1(PLDHashNumber aHash0);
void Hash2(PLDHashNumber aHash, uint32_t& aHash2Out, uint32_t& aSizeMaskOut);
static bool MatchEntryKeyhash(PLDHashEntryHdr* aEntry, PLDHashNumber aHash);
PLDHashEntryHdr* AddressEntry(uint32_t aIndex);
// We store mHashShift rather than sizeLog2 to optimize the collision-free
// case in SearchTable.
uint32_t CapacityFromHashShift() const
{
return ((uint32_t)1 << (kHashBits - mHashShift));
}
PLDHashNumber ComputeKeyHash(const void* aKey);
enum SearchReason { ForSearchOrRemove, ForAdd };
template <SearchReason Reason>
PLDHashEntryHdr* NS_FASTCALL
SearchTable(const void* aKey, PLDHashNumber aKeyHash);
PLDHashEntryHdr* FindFreeEntry(PLDHashNumber aKeyHash);
bool ChangeTable(int aDeltaLog2);
void ShrinkIfAppropriate();
PLDHashTable(const PLDHashTable& aOther) = delete;
PLDHashTable& operator=(const PLDHashTable& aOther) = delete;
};
// Compute the hash code for a given key to be looked up, added, or removed.
// A hash code may have any PLDHashNumber value.
typedef PLDHashNumber (*PLDHashHashKey)(const void* aKey);
// Compare the key identifying aEntry with the provided key parameter. Return
// true if keys match, false otherwise.
typedef bool (*PLDHashMatchEntry)(const PLDHashEntryHdr* aEntry,
const void* aKey);
// Copy the data starting at aFrom to the new entry storage at aTo. Do not add
// reference counts for any strong references in the entry, however, as this
// is a "move" operation: the old entry storage at from will be freed without
// any reference-decrementing callback shortly.
typedef void (*PLDHashMoveEntry)(PLDHashTable* aTable,
const PLDHashEntryHdr* aFrom,
PLDHashEntryHdr* aTo);
// Clear the entry and drop any strong references it holds. This callback is
// invoked by Remove(), but only if the given key is found in the table.
typedef void (*PLDHashClearEntry)(PLDHashTable* aTable,
PLDHashEntryHdr* aEntry);
// Initialize a new entry, apart from mKeyHash. This function is called when
// Add() finds no existing entry for the given key, and must add a new one. At
// that point, |aEntry->mKeyHash| is not set yet, to avoid claiming the last
// free entry in a severely overloaded table.
typedef void (*PLDHashInitEntry)(PLDHashEntryHdr* aEntry, const void* aKey);
// Finally, the "vtable" structure for PLDHashTable. The first four hooks
// must be provided by implementations; they're called unconditionally by the
// generic PLDHashTable.cpp code. Hooks after these may be null.
//
// Summary of allocation-related hook usage with C++ placement new emphasis:
// initEntry Call placement new using default key-based ctor.
// moveEntry Call placement new using copy ctor, run dtor on old
// entry storage.
// clearEntry Run dtor on entry.
//
// Note the reason why initEntry is optional: the default hooks (stubs) clear
// entry storage: On successful Add(tbl, key), the returned entry pointer
// addresses an entry struct whose mKeyHash member has been set non-zero, but
// all other entry members are still clear (null). Add() callers can test such
// members to see whether the entry was newly created by the Add() call that
// just succeeded. If placement new or similar initialization is required,
// define an |initEntry| hook. Of course, the |clearEntry| hook must zero or
// null appropriately.
//
// XXX assumes 0 is null for pointer types.
struct PLDHashTableOps
{
// Mandatory hooks. All implementations must provide these.
PLDHashHashKey hashKey;
PLDHashMatchEntry matchEntry;
PLDHashMoveEntry moveEntry;
PLDHashClearEntry clearEntry;
// Optional hooks start here. If null, these are not called.
PLDHashInitEntry initEntry;
};
// A minimal entry is a subclass of PLDHashEntryHdr and has a void* key pointer.
struct PLDHashEntryStub : public PLDHashEntryHdr
{
const void* key;
};
#endif /* PLDHashTable_h */