From 8c40d81d4f019b8b1ae02c154be657b949c2cf4d Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Mon, 29 Oct 2018 19:25:02 +0000 Subject: [PATCH] Bug 39129: Speeding up partition_point/lower_bound/upper_bound/ by using unsigned division by 2 when possible. Patch by Denis Yaroshevskiy (denis.yaroshevskij@gmail.com) The rational and measurements can be found in the bug description: https://bugs.llvm.org/show_bug.cgi?id=39129 Reviewed as https://reviews.llvm.org/D52697 llvm-svn: 345525 --- libcxx/benchmarks/algorithms.bench.cpp | 64 +++++++++++++++++++ libcxx/include/algorithm | 34 ++++++++-- .../libcxx/algorithms/half_positive.pass.cpp | 59 +++++++++++++++++ 3 files changed, 153 insertions(+), 4 deletions(-) create mode 100644 libcxx/test/libcxx/algorithms/half_positive.pass.cpp diff --git a/libcxx/benchmarks/algorithms.bench.cpp b/libcxx/benchmarks/algorithms.bench.cpp index 86315390e0d2..ab0e81b0cac9 100644 --- a/libcxx/benchmarks/algorithms.bench.cpp +++ b/libcxx/benchmarks/algorithms.bench.cpp @@ -58,5 +58,69 @@ BENCHMARK_CAPTURE(BM_Sort, sorted_descending_strings, BENCHMARK_CAPTURE(BM_Sort, single_element_strings, getDuplicateStringInputs)->Arg(TestNumInputs); +template +void do_binary_search_benchmark(benchmark::State& st, GenInputs gen, Alg alg) +{ + using ValueType = typename decltype(gen(0))::value_type; + auto in = gen(st.range(0)); + std::sort(in.begin(), in.end()); + + const auto every_10_percentile = [&]() -> std::vector { + size_t step = in.size() / 10; + + if (step == 0) { + st.SkipWithError("Input doesn't contain enough elements"); + return {}; + } + + std::vector res; + for (size_t i = 0; i < in.size(); i += step) + res.push_back(&in[i]); + + return res; + }(); + + for (auto _ : st) + { + for (auto* test : every_10_percentile) + benchmark::DoNotOptimize(alg(in.begin(), in.end(), *test)); + } +} + +template +void BM_LowerBound(benchmark::State& st, GenInputs gen) +{ + do_binary_search_benchmark(st, gen, [](auto f, auto l, const auto& v) { + return std::lower_bound(f, l, v); + }); +} + +BENCHMARK_CAPTURE(BM_LowerBound, random_int32, getRandomIntegerInputs) + ->Arg(TestNumInputs) // Small int32_t vector + ->Arg(TestNumInputs * TestNumInputs); // Big int32_t vector + +BENCHMARK_CAPTURE(BM_LowerBound, random_int64, getRandomIntegerInputs) + ->Arg(TestNumInputs); // Small int64_t vector. Should also represent pointers. + +BENCHMARK_CAPTURE(BM_LowerBound, random_strings, getRandomStringInputs) + ->Arg(TestNumInputs); // Small string vector. What happens if the comparison is not very cheap. + +template +void BM_EqualRange(benchmark::State& st, GenInputs gen) +{ + do_binary_search_benchmark(st, gen, [](auto f, auto l, const auto& v) { + return std::equal_range(f, l, v); + }); +} + +BENCHMARK_CAPTURE(BM_EqualRange, random_int32, getRandomIntegerInputs) + ->Arg(TestNumInputs) // Small int32_t vector + ->Arg(TestNumInputs * TestNumInputs); // Big int32_t vector + +BENCHMARK_CAPTURE(BM_EqualRange, random_int64, getRandomIntegerInputs) + ->Arg(TestNumInputs); // Small int64_t vector. Should also represent pointers. + +BENCHMARK_CAPTURE(BM_EqualRange, random_strings, getRandomStringInputs) + ->Arg(TestNumInputs); // Small string vector. What happens if the comparison is not very cheap. BENCHMARK_MAIN(); diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index beee6b5b8371..f119d252063b 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -750,6 +750,32 @@ public: bool operator()(const _T1& __x, const _T2& __y) {return __p_(__y, __x);} }; +// Perform division by two quickly for positive integers (llvm.org/PR39129) + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +typename enable_if +< + is_integral<_Integral>::value, + _Integral +>::type +__half_positive(_Integral __value) +{ + return static_cast<_Integral>(static_cast::type>(__value) / 2); +} + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +typename enable_if +< + !is_integral<_Tp>::value, + _Tp +>::type +__half_positive(_Tp __value) +{ + return __value / 2; +} + #ifdef _LIBCPP_DEBUG template @@ -3202,7 +3228,7 @@ partition_point(_ForwardIterator __first, _ForwardIterator __last, _Predicate __ difference_type __len = _VSTD::distance(__first, __last); while (__len != 0) { - difference_type __l2 = __len / 2; + difference_type __l2 = _VSTD::__half_positive(__len); _ForwardIterator __m = __first; _VSTD::advance(__m, __l2); if (__pred(*__m)) @@ -4069,7 +4095,7 @@ __lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va difference_type __len = _VSTD::distance(__first, __last); while (__len != 0) { - difference_type __l2 = __len / 2; + difference_type __l2 = _VSTD::__half_positive(__len); _ForwardIterator __m = __first; _VSTD::advance(__m, __l2); if (__comp(*__m, __value_)) @@ -4111,7 +4137,7 @@ __upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va difference_type __len = _VSTD::distance(__first, __last); while (__len != 0) { - difference_type __l2 = __len / 2; + difference_type __l2 = _VSTD::__half_positive(__len); _ForwardIterator __m = __first; _VSTD::advance(__m, __l2); if (__comp(__value_, *__m)) @@ -4153,7 +4179,7 @@ __equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va difference_type __len = _VSTD::distance(__first, __last); while (__len != 0) { - difference_type __l2 = __len / 2; + difference_type __l2 = _VSTD::__half_positive(__len); _ForwardIterator __m = __first; _VSTD::advance(__m, __l2); if (__comp(*__m, __value_)) diff --git a/libcxx/test/libcxx/algorithms/half_positive.pass.cpp b/libcxx/test/libcxx/algorithms/half_positive.pass.cpp new file mode 100644 index 000000000000..178055cbbd01 --- /dev/null +++ b/libcxx/test/libcxx/algorithms/half_positive.pass.cpp @@ -0,0 +1,59 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template _Tp __half_positive(const _Tp&); + +// __half_positive divide integer number by 2 as unsigned number +// if it's safe to do so. It can be an important optimization for lower bound, +// for example. + +#include +#include +#include +#include + +#include "test_macros.h" +#include "user_defined_integral.hpp" + +namespace { + +template +TEST_CONSTEXPR bool test(IntType max_v = IntType(std::numeric_limits::max())) { + return std::__half_positive(max_v) == max_v / 2; +} + +} // namespace + +int main() +{ + { + assert(test()); + assert(test()); + assert(test()); + assert((test, int>())); + assert(test()); +#if !defined(_LIBCPP_HAS_NO_INT128) + assert(test<__int128_t>()); +#endif // !defined(_LIBCPP_HAS_NO_INT128) + } + +#if TEST_STD_VER >= 11 + { + static_assert(test(), ""); + static_assert(test(), ""); + static_assert(test(), ""); + static_assert(test(), ""); +#if !defined(_LIBCPP_HAS_NO_INT128) + static_assert(test<__int128_t>(), ""); +#endif // !defined(_LIBCPP_HAS_NO_INT128) + } +#endif // TEST_STD_VER >= 11 +}