Nikolas Klauser f7407411a1
[libc++] Optimize std::find for segmented iterators (#67224)
```
--------------------------------------------------------------------------
Benchmark                                              old             new
--------------------------------------------------------------------------
bm_find<std::deque<char>>/1                        6.06 ns         10.6 ns
bm_find<std::deque<char>>/2                        15.5 ns         10.6 ns
bm_find<std::deque<char>>/3                        19.0 ns         10.6 ns
bm_find<std::deque<char>>/4                        20.8 ns         10.6 ns
bm_find<std::deque<char>>/5                        22.0 ns         10.6 ns
bm_find<std::deque<char>>/6                        23.0 ns         10.5 ns
bm_find<std::deque<char>>/7                        24.8 ns         10.7 ns
bm_find<std::deque<char>>/8                        25.7 ns         10.6 ns
bm_find<std::deque<char>>/16                       28.3 ns         10.6 ns
bm_find<std::deque<char>>/64                       44.2 ns         27.0 ns
bm_find<std::deque<char>>/512                       133 ns         37.6 ns
bm_find<std::deque<char>>/4096                      867 ns         53.1 ns
bm_find<std::deque<char>>/32768                    6838 ns          160 ns
bm_find<std::deque<char>>/262144                  52897 ns         1495 ns
bm_find<std::deque<char>>/1048576                215621 ns         6077 ns
bm_find<std::deque<short>>/1                       6.03 ns         6.28 ns
bm_find<std::deque<short>>/2                       15.8 ns         15.8 ns
bm_find<std::deque<short>>/3                       20.5 ns         20.3 ns
bm_find<std::deque<short>>/4                       21.0 ns         21.0 ns
bm_find<std::deque<short>>/5                       23.0 ns         22.1 ns
bm_find<std::deque<short>>/6                       22.6 ns         23.0 ns
bm_find<std::deque<short>>/7                       23.4 ns         23.7 ns
bm_find<std::deque<short>>/8                       24.4 ns         24.9 ns
bm_find<std::deque<short>>/16                      26.6 ns         27.2 ns
bm_find<std::deque<short>>/64                      43.2 ns         40.9 ns
bm_find<std::deque<short>>/512                      124 ns         90.7 ns
bm_find<std::deque<short>>/4096                     845 ns          525 ns
bm_find<std::deque<short>>/32768                   7273 ns         3194 ns
bm_find<std::deque<short>>/262144                 53710 ns        24385 ns
bm_find<std::deque<short>>/1048576               216086 ns        96195 ns
bm_find<std::deque<int>>/1                         6.03 ns         10.3 ns
bm_find<std::deque<int>>/2                         15.6 ns         10.3 ns
bm_find<std::deque<int>>/3                         19.1 ns         10.3 ns
bm_find<std::deque<int>>/4                         22.3 ns         10.3 ns
bm_find<std::deque<int>>/5                         23.5 ns         10.4 ns
bm_find<std::deque<int>>/6                         23.1 ns         10.3 ns
bm_find<std::deque<int>>/7                         23.7 ns         10.2 ns
bm_find<std::deque<int>>/8                         24.5 ns         10.2 ns
bm_find<std::deque<int>>/16                        27.9 ns         26.6 ns
bm_find<std::deque<int>>/64                        42.6 ns         32.2 ns
bm_find<std::deque<int>>/512                        123 ns         43.0 ns
bm_find<std::deque<int>>/4096                       874 ns         93.5 ns
bm_find<std::deque<int>>/32768                     7031 ns          751 ns
bm_find<std::deque<int>>/262144                   57723 ns         6169 ns
bm_find<std::deque<int>>/1048576                 230867 ns        35851 ns
bm_ranges_find<std::deque<char>>/1                 5.97 ns         10.6 ns
bm_ranges_find<std::deque<char>>/2                 16.0 ns         10.5 ns
bm_ranges_find<std::deque<char>>/3                 19.5 ns         10.5 ns
bm_ranges_find<std::deque<char>>/4                 21.1 ns         10.6 ns
bm_ranges_find<std::deque<char>>/5                 22.8 ns         10.5 ns
bm_ranges_find<std::deque<char>>/6                 22.8 ns         10.6 ns
bm_ranges_find<std::deque<char>>/7                 23.4 ns         10.8 ns
bm_ranges_find<std::deque<char>>/8                 24.1 ns         10.5 ns
bm_ranges_find<std::deque<char>>/16                26.9 ns         10.6 ns
bm_ranges_find<std::deque<char>>/64                50.2 ns         27.2 ns
bm_ranges_find<std::deque<char>>/512                126 ns         38.3 ns
bm_ranges_find<std::deque<char>>/4096               868 ns         53.8 ns
bm_ranges_find<std::deque<char>>/32768             6695 ns          161 ns
bm_ranges_find<std::deque<char>>/262144           54411 ns         1497 ns
bm_ranges_find<std::deque<char>>/1048576         241699 ns         6042 ns
bm_ranges_find<std::deque<short>>/1                6.39 ns         6.31 ns
bm_ranges_find<std::deque<short>>/2                15.8 ns         15.9 ns
bm_ranges_find<std::deque<short>>/3                19.0 ns         19.8 ns
bm_ranges_find<std::deque<short>>/4                20.8 ns         20.9 ns
bm_ranges_find<std::deque<short>>/5                21.8 ns         22.1 ns
bm_ranges_find<std::deque<short>>/6                23.0 ns         23.0 ns
bm_ranges_find<std::deque<short>>/7                23.2 ns         23.9 ns
bm_ranges_find<std::deque<short>>/8                23.7 ns         24.4 ns
bm_ranges_find<std::deque<short>>/16               26.6 ns         26.8 ns
bm_ranges_find<std::deque<short>>/64               43.4 ns         39.7 ns
bm_ranges_find<std::deque<short>>/512               131 ns         90.5 ns
bm_ranges_find<std::deque<short>>/4096              851 ns          523 ns
bm_ranges_find<std::deque<short>>/32768            7370 ns         3166 ns
bm_ranges_find<std::deque<short>>/262144          60778 ns        24814 ns
bm_ranges_find<std::deque<short>>/1048576        229288 ns        99273 ns
bm_ranges_find<std::deque<int>>/1                  6.43 ns         10.2 ns
bm_ranges_find<std::deque<int>>/2                  16.6 ns         10.2 ns
bm_ranges_find<std::deque<int>>/3                  19.6 ns         10.2 ns
bm_ranges_find<std::deque<int>>/4                  21.0 ns         10.2 ns
bm_ranges_find<std::deque<int>>/5                  21.9 ns         10.4 ns
bm_ranges_find<std::deque<int>>/6                  22.7 ns         10.2 ns
bm_ranges_find<std::deque<int>>/7                  23.9 ns         10.2 ns
bm_ranges_find<std::deque<int>>/8                  23.8 ns         10.2 ns
bm_ranges_find<std::deque<int>>/16                 27.2 ns         27.1 ns
bm_ranges_find<std::deque<int>>/64                 42.4 ns         32.4 ns
bm_ranges_find<std::deque<int>>/512                 122 ns         43.0 ns
bm_ranges_find<std::deque<int>>/4096                895 ns         93.7 ns
bm_ranges_find<std::deque<int>>/32768              6890 ns          756 ns
bm_ranges_find<std::deque<int>>/262144            54025 ns         6102 ns
bm_ranges_find<std::deque<int>>/1048576          221558 ns        32783 ns
```
2023-12-15 17:10:16 +01:00

165 lines
6.1 KiB
C++

// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___ALGORITHM_FIND_H
#define _LIBCPP___ALGORITHM_FIND_H
#include <__algorithm/find_segment_if.h>
#include <__algorithm/min.h>
#include <__algorithm/unwrap_iter.h>
#include <__bit/countr.h>
#include <__bit/invert_if.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/invoke.h>
#include <__fwd/bit_reference.h>
#include <__iterator/segmented_iterator.h>
#include <__string/constexpr_c_functions.h>
#include <__type_traits/is_same.h>
#include <__utility/move.h>
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
# include <cwchar>
#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
// generic implementation
template <class _Iter, class _Sent, class _Tp, class _Proj>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter
__find_impl(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
for (; __first != __last; ++__first)
if (std::__invoke(__proj, *__first) == __value)
break;
return __first;
}
// trivially equality comparable implementations
template <class _Tp,
class _Up,
class _Proj,
__enable_if_t<__is_identity<_Proj>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value &&
sizeof(_Tp) == 1,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp*
__find_impl(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
if (auto __ret = std::__constexpr_memchr(__first, __value, __last - __first))
return __ret;
return __last;
}
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class _Tp,
class _Up,
class _Proj,
__enable_if_t<__is_identity<_Proj>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value &&
sizeof(_Tp) == sizeof(wchar_t) && _LIBCPP_ALIGNOF(_Tp) >= _LIBCPP_ALIGNOF(wchar_t),
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp*
__find_impl(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
if (auto __ret = std::__constexpr_wmemchr(__first, __value, __last - __first))
return __ret;
return __last;
}
#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// __bit_iterator implementation
template <bool _ToFind, class _Cp, bool _IsConst>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, _IsConst>
__find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) {
using _It = __bit_iterator<_Cp, _IsConst>;
using __storage_type = typename _It::__storage_type;
const int __bits_per_word = _It::__bits_per_word;
// do first partial word
if (__first.__ctz_ != 0) {
__storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
__storage_type __dn = std::min(__clz_f, __n);
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
if (__b)
return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
if (__n == __dn)
return __first + __n;
__n -= __dn;
++__first.__seg_;
}
// do middle whole words
for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) {
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_);
if (__b)
return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
}
// do last partial word
if (__n > 0) {
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
if (__b)
return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
}
return _It(__first.__seg_, static_cast<unsigned>(__n));
}
template <class _Cp, bool _IsConst, class _Tp, class _Proj, __enable_if_t<__is_identity<_Proj>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, _IsConst>
__find_impl(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) {
if (static_cast<bool>(__value))
return std::__find_bool<true>(__first, static_cast<typename _Cp::size_type>(__last - __first));
return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
}
// segmented iterator implementation
template <class>
struct __find_segment;
template <class _SegmentedIterator,
class _Tp,
class _Proj,
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
__find_impl(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) {
return std::__find_segment_if(std::move(__first), std::move(__last), __find_segment<_Tp>(__value), __proj);
}
template <class _Tp>
struct __find_segment {
const _Tp& __value_;
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __find_segment(const _Tp& __value) : __value_(__value) {}
template <class _InputIterator, class _Proj>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator
operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const {
return std::__find_impl(__first, __last, __value_, __proj);
}
};
// public API
template <class _InputIterator, class _Tp>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
__identity __proj;
return std::__rewrap_iter(
__first, std::__find_impl(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __value, __proj));
}
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___ALGORITHM_FIND_H