mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-01 01:31:26 +00:00
78addb2c32
There are couple of optimizations of `__hash_table::find` which are applicable to other places like `__hash_table::__node_insert_unique_prepare` and `__hash_table::__emplace_unique_key_args`. ``` for (__nd = __nd->__next_; __nd != nullptr && (__nd->__hash() == __hash // ^^^^^^^^^^^^^^^^^^^^^^ // (1) || std::__constrain_hash(__nd->__hash(), __bc) == __chash); __nd = __nd->__next_) { if ((__nd->__hash() == __hash) // ^^^^^^^^^^^^^^^^^^^^^^^^^^ // (2) && key_eq()(__nd->__upcast()->__value_, __k)) return iterator(__nd, this); } ``` (1): We can avoid expensive modulo operations from `std::__constrain_hash` if hashes matched. This one is from commit6a411472e3
. (2): We can avoid `key_eq` calls if hashes didn't match. Commit:318d35a7bc
. Both of them are applicable for insert and emplace methods. Results of unordered_set_operations benchmark: ``` Comparing /tmp/main to /tmp/hashtable-hash-value-optimization Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------------------------------------------------------ BM_Hash/uint32_random_std_hash/1024 -0.0127 -0.0127 1511 1492 1508 1489 BM_Hash/uint32_random_custom_hash/1024 +0.0012 +0.0013 1370 1371 1367 1369 BM_Hash/uint32_top_std_hash/1024 -0.0027 -0.0028 1502 1497 1498 1494 BM_Hash/uint32_top_custom_hash/1024 +0.0033 +0.0032 1368 1373 1365 1370 BM_InsertValue/unordered_set_uint32/1024 +0.0267 +0.0266 36421 37392 36350 37318 BM_InsertValue/unordered_set_uint32_sorted/1024 +0.0230 +0.0229 28247 28897 28193 28837 BM_InsertValue/unordered_set_top_bits_uint32/1024 +0.0492 +0.0491 31012 32539 30952 32472 BM_InsertValueRehash/unordered_set_top_bits_uint32/1024 +0.0523 +0.0520 62905 66197 62780 66043 BM_InsertValue/unordered_set_string/1024 -0.0252 -0.0253 300762 293189 299805 292221 BM_InsertValueRehash/unordered_set_string/1024 -0.0932 -0.0920 332924 301882 331276 300810 BM_InsertValue/unordered_set_prefixed_string/1024 -0.0578 -0.0577 314239 296072 313222 295137 BM_InsertValueRehash/unordered_set_prefixed_string/1024 -0.0986 -0.0985 336091 302950 334982 301995 BM_Find/unordered_set_random_uint64/1024 -0.1416 -0.1417 16075 13798 16041 13769 BM_FindRehash/unordered_set_random_uint64/1024 -0.0105 -0.0105 5900 5838 5889 5827 BM_Find/unordered_set_sorted_uint64/1024 +0.0014 +0.0014 2813 2817 2807 2811 BM_FindRehash/unordered_set_sorted_uint64/1024 -0.0247 -0.0249 5863 5718 5851 5706 BM_Find/unordered_set_sorted_uint128/1024 +0.0113 +0.0112 15570 15746 15539 15713 BM_FindRehash/unordered_set_sorted_uint128/1024 +0.0438 +0.0441 6917 7220 6902 7206 BM_Find/unordered_set_sorted_uint32/1024 -0.0020 -0.0020 3098 3091 3092 3085 BM_FindRehash/unordered_set_sorted_uint32/1024 +0.0570 +0.0569 5377 5684 5368 5673 BM_Find/unordered_set_sorted_large_uint64/1024 +0.0081 +0.0081 3594 3623 3587 3616 BM_FindRehash/unordered_set_sorted_large_uint64/1024 -0.0542 -0.0540 6154 5820 6140 5808 BM_Find/unordered_set_top_bits_uint64/1024 -0.0061 -0.0061 10440 10377 10417 10353 BM_FindRehash/unordered_set_top_bits_uint64/1024 +0.0131 +0.0128 5852 5928 5840 5914 BM_Find/unordered_set_string/1024 -0.0352 -0.0349 189037 182384 188389 181809 BM_FindRehash/unordered_set_string/1024 -0.0309 -0.0311 180718 175142 180141 174532 BM_Find/unordered_set_prefixed_string/1024 -0.0559 -0.0557 190853 180177 190251 179659 BM_FindRehash/unordered_set_prefixed_string/1024 -0.0563 -0.0561 182396 172136 181797 171602 BM_Rehash/unordered_set_string_arg/1024 -0.0244 -0.0241 27052 26393 26989 26339 BM_Rehash/unordered_set_int_arg/1024 -0.0410 -0.0410 19582 18779 19539 18738 BM_InsertDuplicate/unordered_set_int/1024 +0.0023 +0.0025 12168 12196 12142 12173 BM_InsertDuplicate/unordered_set_string/1024 -0.0505 -0.0504 189238 179683 188648 179133 BM_InsertDuplicate/unordered_set_prefixed_string/1024 -0.0989 -0.0987 198893 179222 198263 178702 BM_EmplaceDuplicate/unordered_set_int/1024 -0.0175 -0.0173 12674 12452 12646 12427 BM_EmplaceDuplicate/unordered_set_string/1024 -0.0559 -0.0557 190104 179481 189492 178934 BM_EmplaceDuplicate/unordered_set_prefixed_string/1024 -0.1111 -0.1110 201233 178870 200608 178341 BM_InsertDuplicate/unordered_set_int_insert_arg/1024 -0.0747 -0.0745 12993 12022 12964 11997 BM_InsertDuplicate/unordered_set_string_insert_arg/1024 -0.0584 -0.0583 191489 180311 190864 179731 BM_EmplaceDuplicate/unordered_set_int_insert_arg/1024 -0.0807 -0.0804 35946 33047 35866 32982 BM_EmplaceDuplicate/unordered_set_string_arg/1024 -0.0312 -0.0310 321623 311601 320559 310637 OVERALL_GEOMEAN -0.0276 -0.0275 0 0 0 0 ``` Time differences looks more like noise to me. But if we want to have this optimizations in `find`, we probably want them in `insert` and `emplace` as well. Reviewed By: #libc, Mordante Differential Revision: https://reviews.llvm.org/D140779
145 lines
4.0 KiB
C++
145 lines
4.0 KiB
C++
#ifndef BENCHMARK_GENERATE_INPUT_H
|
|
#define BENCHMARK_GENERATE_INPUT_H
|
|
|
|
#include <algorithm>
|
|
#include <climits>
|
|
#include <cstddef>
|
|
#include <random>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
static const char Letters[] = {
|
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
|
|
'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
|
|
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
|
|
static const std::size_t LettersSize = sizeof(Letters);
|
|
|
|
inline std::default_random_engine& getRandomEngine() {
|
|
static std::default_random_engine RandEngine(std::random_device{}());
|
|
return RandEngine;
|
|
}
|
|
|
|
inline char getRandomChar() {
|
|
std::uniform_int_distribution<> LettersDist(0, LettersSize - 1);
|
|
return Letters[LettersDist(getRandomEngine())];
|
|
}
|
|
|
|
template <class IntT>
|
|
inline IntT getRandomInteger(IntT Min, IntT Max) {
|
|
std::uniform_int_distribution<unsigned long long> dist(Min, Max);
|
|
return static_cast<IntT>(dist(getRandomEngine()));
|
|
}
|
|
|
|
inline std::string getRandomString(std::size_t Len) {
|
|
std::string str(Len, 0);
|
|
std::generate_n(str.begin(), Len, &getRandomChar);
|
|
return str;
|
|
}
|
|
|
|
template <class IntT>
|
|
inline std::vector<IntT> getDuplicateIntegerInputs(size_t N) {
|
|
std::vector<IntT> inputs(N, static_cast<IntT>(-1));
|
|
return inputs;
|
|
}
|
|
|
|
template <class IntT>
|
|
inline std::vector<IntT> getSortedIntegerInputs(size_t N) {
|
|
std::vector<IntT> inputs;
|
|
for (size_t i = 0; i < N; i += 1)
|
|
inputs.push_back(i);
|
|
return inputs;
|
|
}
|
|
|
|
template <class IntT>
|
|
std::vector<IntT> getSortedLargeIntegerInputs(size_t N) {
|
|
std::vector<IntT> inputs;
|
|
for (size_t i = 0; i < N; ++i) {
|
|
inputs.push_back(i + N);
|
|
}
|
|
return inputs;
|
|
}
|
|
|
|
template <class IntT>
|
|
std::vector<IntT> getSortedTopBitsIntegerInputs(size_t N) {
|
|
std::vector<IntT> inputs = getSortedIntegerInputs<IntT>(N);
|
|
for (auto& E : inputs)
|
|
E <<= ((sizeof(IntT) / 2) * CHAR_BIT);
|
|
return inputs;
|
|
}
|
|
|
|
template <class IntT>
|
|
inline std::vector<IntT> getReverseSortedIntegerInputs(size_t N) {
|
|
std::vector<IntT> inputs;
|
|
std::size_t i = N;
|
|
while (i > 0) {
|
|
--i;
|
|
inputs.push_back(i);
|
|
}
|
|
return inputs;
|
|
}
|
|
|
|
template <class IntT>
|
|
std::vector<IntT> getPipeOrganIntegerInputs(size_t N) {
|
|
std::vector<IntT> v;
|
|
v.reserve(N);
|
|
for (size_t i = 0; i < N / 2; ++i)
|
|
v.push_back(i);
|
|
for (size_t i = N / 2; i < N; ++i)
|
|
v.push_back(N - i);
|
|
return v;
|
|
}
|
|
|
|
template <class IntT>
|
|
std::vector<IntT> getRandomIntegerInputs(size_t N) {
|
|
std::vector<IntT> inputs;
|
|
for (size_t i = 0; i < N; ++i) {
|
|
inputs.push_back(getRandomInteger<IntT>(0, std::numeric_limits<IntT>::max()));
|
|
}
|
|
return inputs;
|
|
}
|
|
|
|
inline std::vector<std::string> getDuplicateStringInputs(size_t N) {
|
|
std::vector<std::string> inputs(N, getRandomString(1024));
|
|
return inputs;
|
|
}
|
|
|
|
inline std::vector<std::string> getRandomStringInputs(size_t N) {
|
|
std::vector<std::string> inputs;
|
|
for (size_t i = 0; i < N; ++i) {
|
|
inputs.push_back(getRandomString(1024));
|
|
}
|
|
return inputs;
|
|
}
|
|
|
|
inline std::vector<std::string> getPrefixedRandomStringInputs(size_t N) {
|
|
std::vector<std::string> inputs;
|
|
constexpr int kSuffixLength = 32;
|
|
const std::string prefix = getRandomString(1024 - kSuffixLength);
|
|
for (size_t i = 0; i < N; ++i) {
|
|
inputs.push_back(prefix + getRandomString(kSuffixLength));
|
|
}
|
|
return inputs;
|
|
}
|
|
|
|
inline std::vector<std::string> getSortedStringInputs(size_t N) {
|
|
std::vector<std::string> inputs = getRandomStringInputs(N);
|
|
std::sort(inputs.begin(), inputs.end());
|
|
return inputs;
|
|
}
|
|
|
|
inline std::vector<std::string> getReverseSortedStringInputs(size_t N) {
|
|
std::vector<std::string> inputs = getSortedStringInputs(N);
|
|
std::reverse(inputs.begin(), inputs.end());
|
|
return inputs;
|
|
}
|
|
|
|
inline std::vector<const char*> getRandomCStringInputs(size_t N) {
|
|
static std::vector<std::string> inputs = getRandomStringInputs(N);
|
|
std::vector<const char*> cinputs;
|
|
for (auto const& str : inputs)
|
|
cinputs.push_back(str.c_str());
|
|
return cinputs;
|
|
}
|
|
|
|
#endif // BENCHMARK_GENERATE_INPUT_H
|