improve performance of string::find

string::find used to call the generic algorithm ::find.  The patch special
case string::find such that it ultimately gets converted to calls to memchr
and memcmp.

The patch improves the performance of the string::find routine by about 20x.

Without the patch, the performance on an x86_64-linux 3400 MHz machine is:

Benchmark                           Time           CPU Iterations
-----------------------------------------------------------------
BM_StringFindNoMatch/10             4 ns          4 ns  166421326
BM_StringFindNoMatch/64            37 ns         37 ns   18754392
BM_StringFindNoMatch/512          268 ns        268 ns    2586060
BM_StringFindNoMatch/4k          2143 ns       2144 ns     328342
BM_StringFindNoMatch/32k        16910 ns      16917 ns      40623
BM_StringFindNoMatch/128k       67577 ns      67602 ns      10138
BM_StringFindAllMatch/1             3 ns          3 ns  265163471
BM_StringFindAllMatch/8             6 ns          6 ns  112582467
BM_StringFindAllMatch/64           36 ns         36 ns   19566457
BM_StringFindAllMatch/512         209 ns        209 ns    3318893
BM_StringFindAllMatch/4k         1618 ns       1618 ns     432963
BM_StringFindAllMatch/32k       12909 ns      12914 ns      54317
BM_StringFindAllMatch/128k      48342 ns      48361 ns      13922
BM_StringFindMatch1/1           33777 ns      33790 ns      20698
BM_StringFindMatch1/8           33940 ns      33953 ns      20619
BM_StringFindMatch1/64          34038 ns      34051 ns      20571
BM_StringFindMatch1/512         34217 ns      34230 ns      20480
BM_StringFindMatch1/4k          35510 ns      35524 ns      19752
BM_StringFindMatch1/32k         46438 ns      46456 ns      15030
BM_StringFindMatch2/1           33839 ns      33852 ns      20648
BM_StringFindMatch2/8           33950 ns      33963 ns      20594
BM_StringFindMatch2/64          33846 ns      33859 ns      20668
BM_StringFindMatch2/512         34023 ns      34036 ns      20279
BM_StringFindMatch2/4k          35422 ns      35436 ns      19716
BM_StringFindMatch2/32k         46570 ns      46588 ns      15027

With the patch applied

Benchmark                           Time           CPU Iterations
-----------------------------------------------------------------
BM_StringFindNoMatch/10             5 ns          5 ns  133724346
BM_StringFindNoMatch/64             6 ns          6 ns  119312184
BM_StringFindNoMatch/512           13 ns         13 ns   51539628
BM_StringFindNoMatch/4k            77 ns         77 ns    8935934
BM_StringFindNoMatch/32k          551 ns        551 ns    1222808
BM_StringFindNoMatch/128k        2684 ns       2685 ns     259957
BM_StringFindAllMatch/1             7 ns          7 ns   98017959
BM_StringFindAllMatch/8             7 ns          7 ns   91466911
BM_StringFindAllMatch/64            8 ns          8 ns   85707392
BM_StringFindAllMatch/512          20 ns         20 ns   34490895
BM_StringFindAllMatch/4k           93 ns         93 ns    7360375
BM_StringFindAllMatch/32k         827 ns        828 ns     829944
BM_StringFindAllMatch/128k       3593 ns       3594 ns     195815
BM_StringFindMatch1/1            1332 ns       1332 ns     516354
BM_StringFindMatch1/8            1336 ns       1336 ns     495876
BM_StringFindMatch1/64           1338 ns       1339 ns     516656
BM_StringFindMatch1/512          1357 ns       1357 ns     510717
BM_StringFindMatch1/4k           1485 ns       1486 ns     461228
BM_StringFindMatch1/32k          2235 ns       2236 ns     318253
BM_StringFindMatch2/1            1335 ns       1335 ns     517105
BM_StringFindMatch2/8            1336 ns       1337 ns     518004
BM_StringFindMatch2/64           1344 ns       1345 ns     511751
BM_StringFindMatch2/512          1361 ns       1361 ns     508150
BM_StringFindMatch2/4k           1611 ns       1611 ns     463388
BM_StringFindMatch2/32k          2187 ns       2187 ns     317532

Patch written by Aditya Kumar and Sebastian Pop.

Differential Revision: https://reviews.llvm.org/D27068

llvm-svn: 290761
This commit is contained in:
Sebastian Pop 2016-12-30 18:01:36 +00:00
parent a570a2303c
commit 9070500633
2 changed files with 95 additions and 6 deletions

View File

@ -0,0 +1,49 @@
#include <unordered_set>
#include <vector>
#include <cstdint>
#include "benchmark/benchmark_api.h"
#include "GenerateInput.hpp"
constexpr std::size_t MAX_STRING_LEN = 8 << 14;
// Benchmark when there is no match.
static void BM_StringFindNoMatch(benchmark::State &state) {
std::string s1(state.range(0), '-');
std::string s2(8, '*');
while (state.KeepRunning())
benchmark::DoNotOptimize(s1.find(s2));
}
BENCHMARK(BM_StringFindNoMatch)->Range(10, MAX_STRING_LEN);
// Benchmark when the string matches first time.
static void BM_StringFindAllMatch(benchmark::State &state) {
std::string s1(MAX_STRING_LEN, '-');
std::string s2(state.range(0), '-');
while (state.KeepRunning())
benchmark::DoNotOptimize(s1.find(s2));
}
BENCHMARK(BM_StringFindAllMatch)->Range(1, MAX_STRING_LEN);
// Benchmark when the string matches somewhere in the end.
static void BM_StringFindMatch1(benchmark::State &state) {
std::string s1(MAX_STRING_LEN / 2, '*');
s1 += std::string(state.range(0), '-');
std::string s2(state.range(0), '-');
while (state.KeepRunning())
benchmark::DoNotOptimize(s1.find(s2));
}
BENCHMARK(BM_StringFindMatch1)->Range(1, MAX_STRING_LEN / 4);
// Benchmark when the string matches somewhere from middle to the end.
static void BM_StringFindMatch2(benchmark::State &state) {
std::string s1(MAX_STRING_LEN / 2, '*');
s1 += std::string(state.range(0), '-');
s1 += std::string(state.range(0), '*');
std::string s2(state.range(0), '-');
while (state.KeepRunning())
benchmark::DoNotOptimize(s1.find(s2));
}
BENCHMARK(BM_StringFindMatch2)->Range(1, MAX_STRING_LEN / 4);
BENCHMARK_MAIN()

View File

@ -538,19 +538,59 @@ __str_find(const _CharT *__p, _SizeT __sz,
return static_cast<_SizeT>(__r - __p);
}
template <class _CharT, class _Traits>
inline _LIBCPP_CONSTEXPR_AFTER_CXX11 const _CharT *
__search_substring(const _CharT *__first1, const _CharT *__last1,
const _CharT *__first2, const _CharT *__last2) {
// Take advantage of knowing source and pattern lengths.
// Stop short when source is smaller than pattern.
const ptrdiff_t __len2 = __last2 - __first2;
if (__len2 == 0)
return __first1;
ptrdiff_t __len1 = __last1 - __first1;
if (__len1 < __len2)
return __last1;
// First element of __first2 is loop invariant.
_CharT __f2 = *__first2;
while (true) {
__len1 = __last1 - __first1;
// Check whether __first1 still has at least __len2 bytes.
if (__len1 < __len2)
return __last1;
// Find __f2 the first byte matching in __first1.
__first1 = _Traits::find(__first1, __len1 - __len2 + 1, __f2);
if (__first1 == 0)
return __last1;
// It is faster to compare from the first byte of __first1 even if we
// already know that it matches the first byte of __first2: this is because
// __first2 is most likely aligned, as it is user's "pattern" string, and
// __first1 + 1 is most likely not aligned, as the match is in the middle of
// the string.
if (_Traits::compare(__first1, __first2, __len2) == 0)
return __first1;
++__first1;
}
}
template<class _CharT, class _SizeT, class _Traits, _SizeT __npos>
inline _SizeT _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
__str_find(const _CharT *__p, _SizeT __sz,
const _CharT* __s, _SizeT __pos, _SizeT __n) _NOEXCEPT
{
if (__pos > __sz || __sz - __pos < __n)
if (__pos > __sz)
return __npos;
if (__n == 0)
if (__n == 0) // There is nothing to search, just return __pos.
return __pos;
const _CharT* __r =
_VSTD::__search(__p + __pos, __p + __sz,
__s, __s + __n, _Traits::eq,
random_access_iterator_tag(), random_access_iterator_tag()).first;
const _CharT *__r = __search_substring<_CharT, _Traits>(
__p + __pos, __p + __sz, __s, __s + __n);
if (__r == __p + __sz)
return __npos;
return static_cast<_SizeT>(__r - __p);