/* ScummVM - Graphic Adventure Engine * * ScummVM is the legal property of its developers, whose names * are too numerous to list here. Please refer to the COPYRIGHT * file distributed with this source distribution. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ // The hash map (associative array) implementation in this file is // based on the PyDict implementation of CPython. #ifndef COMMON_HASHMAP_H #define COMMON_HASHMAP_H // Enable the following #define if you want to check how many collisions the // code produces (many collisions indicate either a bad hash function, or a // hash table that is too small). //#define DEBUG_HASH_COLLISIONS /** * Enable the following define to let HashMaps use a memory pool for the * nodes they contain. This increases memory usage, but can also improve * speed quite a bit. */ #define USE_HASHMAP_MEMORY_POOL #include "common/func.h" #include "common/str.h" #ifdef DEBUG_HASH_COLLISIONS #include "common/debug.h" #endif #ifdef USE_HASHMAP_MEMORY_POOL #include "common/memorypool.h" #endif namespace Common { /** * @defgroup common_hashmap Hash table (HashMap) * @ingroup common * * @brief API for operations on a hash table. * * @{ */ // The sgi IRIX MIPSpro Compiler has difficulties with nested templates. // This and the other __sgi conditionals below work around these problems. // The Intel C++ Compiler suffers from the same problems. #if (defined(__sgi) && !defined(__GNUC__)) || defined(__INTEL_COMPILER) template class IteratorImpl; #endif /** * HashMap maps objects of type Key to objects of type Val. * For each used Key type, a "size_type hashit(Key,size_type)" function * is required that computes a hash for the given Key object and returns it as * an integer from 0 to hashsize-1. An "equality functor" is also required * that returns true if its two arguments are to be considered * equal. Also, it is assumed that "=" works on Val objects for assignment. * * If aa is a HashMap, then space is allocated each time aa[key] is * referenced, for a new key. If the object is const, then an assertion is * triggered instead. Hence, if you are not sure whether a key is contained in * the map, use contains() first to check for its presence. */ template, class EqualFunc = EqualTo > class HashMap { public: typedef uint size_type; private: typedef HashMap HM_t; struct Node { Val _value; const Key _key; explicit Node(const Key &key) : _key(key), _value() {} Node() : _key(), _value() {} }; enum { HASHMAP_PERTURB_SHIFT = 5, HASHMAP_MIN_CAPACITY = 16, // The quotient of the next two constants controls how much the // internal storage of the hashmap may fill up before being // increased automatically. // Note: the quotient of these two must be between and different // from 0 and 1. HASHMAP_LOADFACTOR_NUMERATOR = 2, HASHMAP_LOADFACTOR_DENOMINATOR = 3, HASHMAP_MEMORYPOOL_SIZE = HASHMAP_MIN_CAPACITY * HASHMAP_LOADFACTOR_NUMERATOR / HASHMAP_LOADFACTOR_DENOMINATOR }; #ifdef USE_HASHMAP_MEMORY_POOL ObjectPool _nodePool; #endif /** Default value, returned by the const getVal. */ Val _defaultVal; Node **_storage; ///< hashtable of size arrsize. size_type _mask; ///< Capacity of the HashMap minus one; must be a power of two of minus one size_type _size; size_type _deleted; ///< Number of deleted elements (_dummyNodes) HashFunc _hash; EqualFunc _equal; /** Dummy node, used as marker for erased objects. */ #define HASHMAP_DUMMY_NODE ((Node *)1) #ifdef DEBUG_HASH_COLLISIONS mutable int _collisions, _lookups, _dummyHits; #endif Node *allocNode(const Key &key) { #ifdef USE_HASHMAP_MEMORY_POOL return new (_nodePool) Node(key); #else return new Node(key); #endif } void freeNode(Node *node) { if (node && node != HASHMAP_DUMMY_NODE) #ifdef USE_HASHMAP_MEMORY_POOL _nodePool.deleteChunk(node); #else delete node; #endif } void assign(const HM_t &map); size_type lookup(const Key &key) const; size_type lookupAndCreateIfMissing(const Key &key); void expandStorage(size_type newCapacity); #if !defined(__sgi) || defined(__GNUC__) template friend class IteratorImpl; #endif /** * Simple HashMap iterator implementation. */ template class IteratorImpl { friend class HashMap; #if (defined(__sgi) && !defined(__GNUC__)) || defined(__INTEL_COMPILER) template friend class Common::IteratorImpl; #else template friend class IteratorImpl; #endif protected: typedef const HashMap hashmap_t; size_type _idx; hashmap_t *_hashmap; protected: IteratorImpl(size_type idx, hashmap_t *hashmap) : _idx(idx), _hashmap(hashmap) {} NodeType *deref() const { assert(_hashmap != nullptr); assert(_idx <= _hashmap->_mask); Node *node = _hashmap->_storage[_idx]; assert(node != nullptr); assert(node != HASHMAP_DUMMY_NODE); return node; } public: IteratorImpl() : _idx(0), _hashmap(nullptr) {} template IteratorImpl(const IteratorImpl &c) : _idx(c._idx), _hashmap(c._hashmap) {} NodeType &operator*() const { return *deref(); } NodeType *operator->() const { return deref(); } bool operator==(const IteratorImpl &iter) const { return _idx == iter._idx && _hashmap == iter._hashmap; } bool operator!=(const IteratorImpl &iter) const { return !(*this == iter); } IteratorImpl &operator++() { assert(_hashmap); do { _idx++; } while (_idx <= _hashmap->_mask && (_hashmap->_storage[_idx] == nullptr || _hashmap->_storage[_idx] == HASHMAP_DUMMY_NODE)); if (_idx > _hashmap->_mask) _idx = (size_type)-1; return *this; } IteratorImpl operator++(int) { IteratorImpl old = *this; operator ++(); return old; } }; public: typedef IteratorImpl iterator; typedef IteratorImpl const_iterator; HashMap(); HashMap(const HM_t &map); ~HashMap(); HM_t &operator=(const HM_t &map) { if (this == &map) return *this; // Remove the previous content and ... clear(); delete[] _storage; // ... copy the new stuff. assign(map); return *this; } bool contains(const Key &key) const; Val &operator[](const Key &key); const Val &operator[](const Key &key) const; Val &getOrCreateVal(const Key &key); Val &getVal(const Key &key); const Val &getVal(const Key &key) const; const Val &getValOrDefault(const Key &key) const; const Val &getValOrDefault(const Key &key, const Val &defaultVal) const; bool tryGetVal(const Key &key, Val &out) const; void setVal(const Key &key, const Val &val); void clear(bool shrinkArray = 0); void erase(iterator entry); void erase(const Key &key); size_type size() const { return _size; } iterator begin() { // Find and return the first non-empty entry for (size_type ctr = 0; ctr <= _mask; ++ctr) { if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE) return iterator(ctr, this); } return end(); } iterator end() { return iterator((size_type)-1, this); } const_iterator begin() const { // Find and return the first non-empty entry for (size_type ctr = 0; ctr <= _mask; ++ctr) { if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE) return const_iterator(ctr, this); } return end(); } const_iterator end() const { return const_iterator((size_type)-1, this); } iterator find(const Key &key) { size_type ctr = lookup(key); if (_storage[ctr]) return iterator(ctr, this); return end(); } const_iterator find(const Key &key) const { size_type ctr = lookup(key); if (_storage[ctr]) return const_iterator(ctr, this); return end(); } // TODO: insert() method? /** Return true if hashmap is empty. */ bool empty() const { return (_size == 0); } }; template void NORETURN_PRE unknownKeyError(Key k) NORETURN_POST { error("Unknown key"); } template<> void NORETURN_PRE unknownKeyError(::Common::String key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(signed char key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(unsigned char key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(short signed key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(short unsigned key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(long signed key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(long unsigned key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(signed int key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(unsigned int key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(long long signed key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(long long unsigned key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(void *key) NORETURN_POST; template<> void NORETURN_PRE unknownKeyError(const char *key) NORETURN_POST; //------------------------------------------------------- // HashMap functions /** * Base constructor, creates an empty hashmap. */ template HashMap::HashMap() : _defaultVal() { _mask = HASHMAP_MIN_CAPACITY - 1; _storage = new Node *[HASHMAP_MIN_CAPACITY]; assert(_storage != nullptr); memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *)); _size = 0; _deleted = 0; #ifdef DEBUG_HASH_COLLISIONS _collisions = 0; _lookups = 0; _dummyHits = 0; #endif } /** * Copy constructor, creates a full copy of the given hashmap. * A custom copy constructor must be provided as pointers * to heap buffers are used for the internal storage. */ template HashMap::HashMap(const HM_t &map) : _defaultVal() { #ifdef DEBUG_HASH_COLLISIONS _collisions = 0; _lookups = 0; _dummyHits = 0; #endif assign(map); } /** * Destructor, frees all used memory. */ template HashMap::~HashMap() { for (size_type ctr = 0; ctr <= _mask; ++ctr) freeNode(_storage[ctr]); delete[] _storage; #ifdef DEBUG_HASH_COLLISIONS extern void updateHashCollisionStats(int, int, int, int, int); updateHashCollisionStats(_collisions, _dummyHits, _lookups, _mask + 1, _size); #endif } /** * Internal method for assigning the content of another HashMap * to this one. * * @note The previous storage here is *not* deallocated here -- the caller is * responsible for doing that! */ template void HashMap::assign(const HM_t &map) { _mask = map._mask; _storage = new Node *[_mask + 1]; assert(_storage != nullptr); memset(_storage, 0, (_mask + 1) * sizeof(Node *)); // Simply clone the map given to us, one by one. _size = 0; _deleted = 0; for (size_type ctr = 0; ctr <= _mask; ++ctr) { if (map._storage[ctr] == HASHMAP_DUMMY_NODE) { _storage[ctr] = HASHMAP_DUMMY_NODE; _deleted++; } else if (map._storage[ctr] != nullptr) { _storage[ctr] = allocNode(map._storage[ctr]->_key); _storage[ctr]->_value = map._storage[ctr]->_value; _size++; } } // Perform a sanity check (to help track down hashmap corruption) assert(_size == map._size); assert(_deleted == map._deleted); } /** * Clear all values in the hashmap. */ template void HashMap::clear(bool shrinkArray) { for (size_type ctr = 0; ctr <= _mask; ++ctr) { freeNode(_storage[ctr]); _storage[ctr] = nullptr; } #ifdef USE_HASHMAP_MEMORY_POOL _nodePool.freeUnusedPages(); #endif if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) { delete[] _storage; _mask = HASHMAP_MIN_CAPACITY - 1; _storage = new Node *[HASHMAP_MIN_CAPACITY]; assert(_storage != nullptr); memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *)); } _size = 0; _deleted = 0; } template void HashMap::expandStorage(size_type newCapacity) { assert(newCapacity > _mask + 1); #ifndef NDEBUG const size_type old_size = _size; #endif const size_type old_mask = _mask; Node **old_storage = _storage; // allocate a new array _size = 0; _deleted = 0; _mask = newCapacity - 1; _storage = new Node *[newCapacity]; assert(_storage != nullptr); memset(_storage, 0, newCapacity * sizeof(Node *)); // rehash all the old elements for (size_type ctr = 0; ctr <= old_mask; ++ctr) { if (old_storage[ctr] == nullptr || old_storage[ctr] == HASHMAP_DUMMY_NODE) continue; // Insert the element from the old table into the new table. // Since we know that no key exists twice in the old table, we // can do this slightly better than by calling lookup, since we // don't have to call _equal(). const size_type hash = _hash(old_storage[ctr]->_key); size_type idx = hash & _mask; for (size_type perturb = hash; _storage[idx] != nullptr && _storage[idx] != HASHMAP_DUMMY_NODE; perturb >>= HASHMAP_PERTURB_SHIFT) { idx = (5 * idx + perturb + 1) & _mask; } _storage[idx] = old_storage[ctr]; _size++; } // Perform a sanity check: Old number of elements should match the new one! // This check will fail if some previous operation corrupted this hashmap. assert(_size == old_size); delete[] old_storage; return; } template typename HashMap::size_type HashMap::lookup(const Key &key) const { const size_type hash = _hash(key); size_type ctr = hash & _mask; for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { if (_storage[ctr] == nullptr) break; if (_storage[ctr] == HASHMAP_DUMMY_NODE) { #ifdef DEBUG_HASH_COLLISIONS _dummyHits++; #endif } else if (_equal(_storage[ctr]->_key, key)) break; ctr = (5 * ctr + perturb + 1) & _mask; #ifdef DEBUG_HASH_COLLISIONS _collisions++; #endif } #ifdef DEBUG_HASH_COLLISIONS _lookups++; debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d", _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups), (const void *)this, _mask + 1, _size); #endif return ctr; } template typename HashMap::size_type HashMap::lookupAndCreateIfMissing(const Key &key) { const size_type hash = _hash(key); size_type ctr = hash & _mask; const size_type NONE_FOUND = _mask + 1; size_type first_free = NONE_FOUND; bool found = false; for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { if (_storage[ctr] == nullptr) break; if (_storage[ctr] == HASHMAP_DUMMY_NODE) { #ifdef DEBUG_HASH_COLLISIONS _dummyHits++; #endif if (first_free == NONE_FOUND) first_free = ctr; } else if (_equal(_storage[ctr]->_key, key)) { found = true; break; } ctr = (5 * ctr + perturb + 1) & _mask; #ifdef DEBUG_HASH_COLLISIONS _collisions++; #endif } #ifdef DEBUG_HASH_COLLISIONS _lookups++; debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d", _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups), (const void *)this, _mask + 1, _size); #endif if (!found && first_free != NONE_FOUND) ctr = first_free; if (!found) { if (_storage[ctr]) _deleted--; _storage[ctr] = allocNode(key); assert(_storage[ctr] != nullptr); _size++; // Keep the load factor below a certain threshold. // Deleted nodes are also counted size_type capacity = _mask + 1; if ((_size + _deleted) * HASHMAP_LOADFACTOR_DENOMINATOR > capacity * HASHMAP_LOADFACTOR_NUMERATOR) { capacity = capacity < 500 ? (capacity * 4) : (capacity * 2); expandStorage(capacity); ctr = lookup(key); assert(_storage[ctr] != nullptr); } } return ctr; } /** * Check whether the hashmap contains the given key. */ template bool HashMap::contains(const Key &key) const { size_type ctr = lookup(key); return (_storage[ctr] != nullptr); } /** * Get a value from the hashmap. */ template Val &HashMap::operator[](const Key &key) { return getOrCreateVal(key); } /** * @overload */ template const Val &HashMap::operator[](const Key &key) const { return getVal(key); } /** * Get a value from the hashmap. */ template Val &HashMap::getOrCreateVal(const Key &key) { size_type ctr = lookupAndCreateIfMissing(key); assert(_storage[ctr] != nullptr); return _storage[ctr]->_value; } /** * @overload */ template Val &HashMap::getVal(const Key &key) { size_type ctr = lookup(key); if (_storage[ctr] != nullptr) return _storage[ctr]->_value; else // In the past getVal() and operator[] used to return the default value for this case. // Clarifying the intent by using getValOrDefault() when we query a key that may not be // present is a good idea, but we have a lot of legacy code that may need to be updated. // So for now only returns an error in non-release builds. Once we are confident all the // code has been updated to use the correct function we can remove the RELEASE_BUILD // special case. #ifdef RELEASE_BUILD return _defaultVal; #else unknownKeyError(key); #endif } template const Val &HashMap::getVal(const Key &key) const { size_type ctr = lookup(key); if (_storage[ctr] != nullptr) return _storage[ctr]->_value; else // See comment in non-const getVal() above. #ifdef RELEASE_BUILD return _defaultVal; #else unknownKeyError(key); #endif } template const Val &HashMap::getValOrDefault(const Key &key) const { return getValOrDefault(key, _defaultVal); } /** * Get a value from the hashmap. If the key is not present, then return @p defaultVal. */ template const Val &HashMap::getValOrDefault(const Key &key, const Val &defaultVal) const { size_type ctr = lookup(key); if (_storage[ctr] != nullptr) return _storage[ctr]->_value; else return defaultVal; } /** * Assign an element specified by @p key to a value @p val. */ template bool HashMap::tryGetVal(const Key &key, Val &out) const { size_type ctr = lookup(key); if (_storage[ctr] != nullptr) { out = _storage[ctr]->_value; return true; } else { return false; } } template void HashMap::setVal(const Key &key, const Val &val) { size_type ctr = lookupAndCreateIfMissing(key); assert(_storage[ctr] != nullptr); _storage[ctr]->_value = val; } /** * Erase an element referred to by an iterator. */ template void HashMap::erase(iterator entry) { // Check whether we have a valid iterator assert(entry._hashmap == this); const size_type ctr = entry._idx; assert(ctr <= _mask); Node * const node = _storage[ctr]; assert(node != NULL); assert(node != HASHMAP_DUMMY_NODE); // If we remove a key, we replace it with a dummy node. freeNode(node); _storage[ctr] = HASHMAP_DUMMY_NODE; _size--; _deleted++; } /** * Erase an element specified by a key. */ template void HashMap::erase(const Key &key) { size_type ctr = lookup(key); if (_storage[ctr] == nullptr) return; // If we remove a key, we replace it with a dummy node. freeNode(_storage[ctr]); _storage[ctr] = HASHMAP_DUMMY_NODE; _size--; _deleted++; return; } #undef HASHMAP_DUMMY_NODE /** @} */ } // End of namespace Common #endif