From 688edc78f9763a124f851a360bf47867fb3f27e8 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Tue, 18 Apr 2017 07:17:20 +0000 Subject: [PATCH] Update google benchmark git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@300530 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/google-benchmark/AUTHORS | 3 + utils/google-benchmark/CMakeLists.txt | 28 +- utils/google-benchmark/CONTRIBUTORS | 3 + utils/google-benchmark/README.md | 66 +++- .../cmake/AddCXXCompilerFlag.cmake | 35 +- .../cmake/CXXFeatureCheck.cmake | 2 +- utils/google-benchmark/cmake/Config.cmake.in | 1 + utils/google-benchmark/docs/tools.md | 59 ++++ .../include/benchmark/benchmark_api.h | 97 ++++-- .../include/benchmark/reporter.h | 13 +- utils/google-benchmark/mingw.py | 320 ++++++++++++++++++ utils/google-benchmark/src/CMakeLists.txt | 45 ++- utils/google-benchmark/src/benchmark.cc | 37 +- .../src/benchmark_api_internal.h | 2 + .../src/benchmark_register.cc | 35 +- utils/google-benchmark/src/complexity.cc | 40 +++ .../google-benchmark/src/console_reporter.cc | 33 +- utils/google-benchmark/src/counter.cc | 68 ++++ utils/google-benchmark/src/counter.h | 26 ++ utils/google-benchmark/src/csv_reporter.cc | 58 +++- utils/google-benchmark/src/cycleclock.h | 31 +- utils/google-benchmark/src/internal_macros.h | 19 +- utils/google-benchmark/src/json_reporter.cc | 7 +- utils/google-benchmark/src/re.h | 16 +- utils/google-benchmark/src/sleep.cc | 3 +- utils/google-benchmark/src/sleep.h | 12 +- utils/google-benchmark/src/string_util.cc | 20 +- utils/google-benchmark/src/sysinfo.cc | 9 +- utils/google-benchmark/src/timers.cc | 45 ++- utils/google-benchmark/test/CMakeLists.txt | 19 ++ utils/google-benchmark/test/benchmark_test.cc | 22 +- utils/google-benchmark/test/cxx03_test.cc | 6 + .../google-benchmark/test/diagnostics_test.cc | 2 +- utils/google-benchmark/test/options_test.cc | 24 +- .../test/output_test_helper.cc | 12 +- .../test/reporter_output_test.cc | 6 +- utils/google-benchmark/tools/compare_bench.py | 58 +++- .../tools/gbench/Inputs/test1_run1.json | 14 + .../tools/gbench/Inputs/test1_run2.json | 14 + utils/google-benchmark/tools/gbench/report.py | 21 +- utils/google-benchmark/tools/gbench/util.py | 61 +++- 41 files changed, 1230 insertions(+), 162 deletions(-) create mode 100644 utils/google-benchmark/cmake/Config.cmake.in create mode 100644 utils/google-benchmark/docs/tools.md create mode 100644 utils/google-benchmark/mingw.py create mode 100644 utils/google-benchmark/src/counter.cc create mode 100644 utils/google-benchmark/src/counter.h diff --git a/utils/google-benchmark/AUTHORS b/utils/google-benchmark/AUTHORS index 5a545fa5b..c4b059df2 100644 --- a/utils/google-benchmark/AUTHORS +++ b/utils/google-benchmark/AUTHORS @@ -18,12 +18,15 @@ Eugene Zhuk Evgeny Safronov Felix Homann Google Inc. +International Business Machines Corporation Ismael Jimenez Martinez +Joao Paulo Magalhaes JianXiong Zhou Jussi Knuuttila Kaito Udagawa Lei Xu Matt Clarkson +Maxim Vafin Nick Hutchinson Oleksandr Sochka Paul Redmond diff --git a/utils/google-benchmark/CMakeLists.txt b/utils/google-benchmark/CMakeLists.txt index 8bfd21b19..1ba313319 100644 --- a/utils/google-benchmark/CMakeLists.txt +++ b/utils/google-benchmark/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 2.8.11) +cmake_minimum_required (VERSION 2.8.12) project (benchmark) foreach(p @@ -11,8 +11,11 @@ foreach(p endforeach() option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) +option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON) option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF) option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF) +option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library" OFF) + # Make sure we can import out CMake functions list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") @@ -33,12 +36,20 @@ include(CheckCXXCompilerFlag) include(AddCXXCompilerFlag) include(CXXFeatureCheck) +if (BENCHMARK_BUILD_32_BITS) + add_required_cxx_compiler_flag(-m32) +endif() + if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") # Turn compiler warnings up to 11 string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") add_definitions(-D_CRT_SECURE_NO_WARNINGS) + if (NOT BENCHMARK_ENABLE_EXCEPTIONS) + add_cxx_compiler_flag(-EHs-) + add_cxx_compiler_flag(-EHa-) + endif() # Link time optimisation if (BENCHMARK_ENABLE_LTO) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GL") @@ -80,12 +91,20 @@ else() add_cxx_compiler_flag(-Wshorten-64-to-32) add_cxx_compiler_flag(-Wfloat-equal) add_cxx_compiler_flag(-fstrict-aliasing) + if (NOT BENCHMARK_ENABLE_EXCEPTIONS) + add_cxx_compiler_flag(-fno-exceptions) + endif() if (NOT BENCHMARK_USE_LIBCXX) add_cxx_compiler_flag(-Wzero-as-null-pointer-constant) endif() if (HAVE_CXX_FLAG_FSTRICT_ALIASING) - add_cxx_compiler_flag(-Wstrict-aliasing) + if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #ICC17u2: Many false positives for Wstrict-aliasing + add_cxx_compiler_flag(-Wstrict-aliasing) + endif() endif() + # ICC17u2: overloaded virtual function "benchmark::Fixture::SetUp" is only partially overridden + # (because of deprecated overload) + add_cxx_compiler_flag(-wd654) add_cxx_compiler_flag(-Wthread-safety) if (HAVE_CXX_FLAG_WTHREAD_SAFETY) cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) @@ -162,7 +181,10 @@ cxx_feature_check(POSIX_REGEX) if(NOT HAVE_STD_REGEX AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX) message(FATAL_ERROR "Failed to determine the source files for the regular expression backend") endif() - +if (NOT BENCHMARK_ENABLE_EXCEPTIONS AND HAVE_STD_REGEX + AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX) + message(WARNING "Using std::regex with exceptions disabled is not fully supported") +endif() cxx_feature_check(STEADY_CLOCK) # Ensure we have pthreads find_package(Threads REQUIRED) diff --git a/utils/google-benchmark/CONTRIBUTORS b/utils/google-benchmark/CONTRIBUTORS index 33cd941ff..8ca4565aa 100644 --- a/utils/google-benchmark/CONTRIBUTORS +++ b/utils/google-benchmark/CONTRIBUTORS @@ -34,18 +34,21 @@ Eugene Zhuk Evgeny Safronov Felix Homann Ismael Jimenez Martinez +Joao Paulo Magalhaes JianXiong Zhou Jussi Knuuttila Kaito Udagawa Kai Wolf Lei Xu Matt Clarkson +Maxim Vafin Nick Hutchinson Oleksandr Sochka Pascal Leroy Paul Redmond Pierre Phaneuf Radoslav Yovchev +Ray Glover Shuo Chen Yusuke Suzuki Tobias Ulvgård diff --git a/utils/google-benchmark/README.md b/utils/google-benchmark/README.md index 910943098..f16a9d790 100644 --- a/utils/google-benchmark/README.md +++ b/utils/google-benchmark/README.md @@ -11,6 +11,8 @@ IRC channel: https://freenode.net #googlebenchmark [Known issues and common problems](#known-issues) +[Additional Tooling Documentation](docs/tools.md) + ## Example usage ### Basic usage Define a function that executes the code to be measured. @@ -363,7 +365,7 @@ static void BM_vector_push_back(benchmark::State& state) { } ``` -Note that `ClobberMemory()` is only available for GNU based compilers. +Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. ### Set time unit manually If a benchmark runs a few milliseconds it may be hard to visually compare the @@ -430,6 +432,65 @@ BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); /* BarTest is now registered */ ``` + +## User-defined counters + +You can add your own counters with user-defined names. The example below +will add columns "Foo", "Bar" and "Baz" in its output: + +```c++ +static void UserCountersExample1(benchmark::State& state) { + double numFoos = 0, numBars = 0, numBazs = 0; + while (state.KeepRunning()) { + // ... count Foo,Bar,Baz events + } + state.counters["Foo"] = numFoos; + state.counters["Bar"] = numBars; + state.counters["Baz"] = numBazs; +} +``` + +The `state.counters` object is a `std::map` with `std::string` keys +and `Counter` values. The latter is a `double`-like class, via an implicit +conversion to `double&`. Thus you can use all of the standard arithmetic +assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter. + +In multithreaded benchmarks, each counter is set on the calling thread only. +When the benchmark finishes, the counters from each thread will be summed; +the resulting sum is the value which will be shown for the benchmark. + +The `Counter` constructor accepts two parameters: the value as a `double` +and a bit flag which allows you to show counters as rates and/or as +per-thread averages: + +```c++ + // sets a simple counter + state.counters["Foo"] = numFoos; + + // Set the counter as a rate. It will be presented divided + // by the duration of the benchmark. + state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); + + // Set the counter as a thread-average quantity. It will + // be presented divided by the number of threads. + state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); + + // There's also a combined flag: + state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate); +``` + +When you're compiling in C++11 mode or later you can use `insert()` with +`std::initializer_list`: + +```c++ + // With C++11, this can be done: + state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); + // ... instead of: + state.counters["Foo"] = numFoos; + state.counters["Bar"] = numBars; + state.counters["Baz"] = numBazs; +``` + ## Exiting Benchmarks in Error When errors caused by external influences, such as file I/O and network @@ -501,7 +562,7 @@ The `context` attribute contains information about the run in general, including information about the CPU and the date. The `benchmarks` attribute contains a list of ever benchmark run. Example json output looks like: -``` json +```json { "context": { "date": "2015/03/17-18:40:25", @@ -582,6 +643,7 @@ The following minimum versions are strongly recommended build the library: * GCC 4.8 * Clang 3.4 * Visual Studio 2013 +* Intel 2015 Update 1 Anything older *may* work. diff --git a/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake b/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake index 9afde84be..0b176ba27 100644 --- a/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake +++ b/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake @@ -19,14 +19,21 @@ set(__add_cxx_compiler_flag INCLUDED) include(CheckCXXCompilerFlag) -function(add_cxx_compiler_flag FLAG) +function(mangle_compiler_flag FLAG OUTPUT) string(TOUPPER "HAVE_CXX_FLAG_${FLAG}" SANITIZED_FLAG) string(REPLACE "+" "X" SANITIZED_FLAG ${SANITIZED_FLAG}) string(REGEX REPLACE "[^A-Za-z_0-9]" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) string(REGEX REPLACE "_+" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) - set(CMAKE_REQUIRED_FLAGS "${FLAG}") - check_cxx_compiler_flag("${FLAG}" ${SANITIZED_FLAG}) - if(${SANITIZED_FLAG}) + set(${OUTPUT} "${SANITIZED_FLAG}" PARENT_SCOPE) +endfunction(mangle_compiler_flag) + +function(add_cxx_compiler_flag FLAG) + mangle_compiler_flag("${FLAG}" MANGLED_FLAG) + set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}") + check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) + set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") + if(${MANGLED_FLAG}) set(VARIANT ${ARGV1}) if(ARGV1) string(TOUPPER "_${VARIANT}" VARIANT) @@ -35,3 +42,23 @@ function(add_cxx_compiler_flag FLAG) endif() endfunction() +function(add_required_cxx_compiler_flag FLAG) + mangle_compiler_flag("${FLAG}" MANGLED_FLAG) + set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}") + check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) + set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") + if(${MANGLED_FLAG}) + set(VARIANT ${ARGV1}) + if(ARGV1) + string(TOUPPER "_${VARIANT}" VARIANT) + endif() + set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}" PARENT_SCOPE) + else() + message(FATAL_ERROR "Required flag '${FLAG}' is not supported by the compiler") + endif() +endfunction() diff --git a/utils/google-benchmark/cmake/CXXFeatureCheck.cmake b/utils/google-benchmark/cmake/CXXFeatureCheck.cmake index b106f32b6..6efe6a84e 100644 --- a/utils/google-benchmark/cmake/CXXFeatureCheck.cmake +++ b/utils/google-benchmark/cmake/CXXFeatureCheck.cmake @@ -10,7 +10,7 @@ # # include(CXXFeatureCheck) # cxx_feature_check(STD_REGEX) -# Requires CMake 2.6+ +# Requires CMake 2.8.12+ if(__cxx_feature_check) return() diff --git a/utils/google-benchmark/cmake/Config.cmake.in b/utils/google-benchmark/cmake/Config.cmake.in new file mode 100644 index 000000000..6e9256eea --- /dev/null +++ b/utils/google-benchmark/cmake/Config.cmake.in @@ -0,0 +1 @@ +include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake") diff --git a/utils/google-benchmark/docs/tools.md b/utils/google-benchmark/docs/tools.md new file mode 100644 index 000000000..f176f74a4 --- /dev/null +++ b/utils/google-benchmark/docs/tools.md @@ -0,0 +1,59 @@ +# Benchmark Tools + +## compare_bench.py + +The `compare_bench.py` utility which can be used to compare the result of benchmarks. +The program is invoked like: + +``` bash +$ compare_bench.py [benchmark options]... +``` + +Where `` and `` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file. + +The sample output using the JSON test files under `Inputs/` gives: + +``` bash +$ ./compare_bench.py ./gbench/Inputs/test1_run1.json ./gbench/Inputs/test1_run2.json +Comparing ./gbench/Inputs/test1_run1.json to ./gbench/Inputs/test1_run2.json +Benchmark Time CPU +---------------------------------------------- +BM_SameTimes +0.00 +0.00 +BM_2xFaster -0.50 -0.50 +BM_2xSlower +1.00 +1.00 +BM_10PercentFaster -0.10 -0.10 +BM_10PercentSlower +0.10 +0.10 +``` + +When a benchmark executable is run, the raw output from the benchmark is printed in real time to stdout. The sample output using `benchmark/basic_test` for both arguments looks like: + +``` +./compare_bench.py test/basic_test test/basic_test --benchmark_filter=BM_empty.* +RUNNING: test/basic_test --benchmark_filter=BM_empty.* +Run on (4 X 4228.32 MHz CPU s) +2016-08-02 19:21:33 +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_empty 9 ns 9 ns 79545455 +BM_empty/threads:4 4 ns 9 ns 75268816 +BM_empty_stop_start 8 ns 8 ns 83333333 +BM_empty_stop_start/threads:4 3 ns 8 ns 83333332 +RUNNING: test/basic_test --benchmark_filter=BM_empty.* +Run on (4 X 4228.32 MHz CPU s) +2016-08-02 19:21:35 +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_empty 9 ns 9 ns 76086957 +BM_empty/threads:4 4 ns 9 ns 76086956 +BM_empty_stop_start 8 ns 8 ns 87500000 +BM_empty_stop_start/threads:4 3 ns 8 ns 88607596 +Comparing test/basic_test to test/basic_test +Benchmark Time CPU +--------------------------------------------------------- +BM_empty +0.00 +0.00 +BM_empty/threads:4 +0.00 +0.00 +BM_empty_stop_start +0.00 +0.00 +BM_empty_stop_start/threads:4 +0.00 +0.00 +``` + +Obviously this example doesn't give any useful output, but it's intended to show the output format when 'compare_bench.py' needs to run benchmarks. diff --git a/utils/google-benchmark/include/benchmark/benchmark_api.h b/utils/google-benchmark/include/benchmark/benchmark_api.h index 28baa587a..1e853e2cd 100644 --- a/utils/google-benchmark/include/benchmark/benchmark_api.h +++ b/utils/google-benchmark/include/benchmark/benchmark_api.h @@ -155,19 +155,29 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #include #include +#include #include "macros.h" #if defined(BENCHMARK_HAS_CXX11) #include +#include #include #endif +#if defined(_MSC_VER) +#include // for _ReadWriteBarrier +#endif + namespace benchmark { class BenchmarkReporter; void Initialize(int* argc, char** argv); +// Report to stdout all arguments in 'argv' as unrecognized except the first. +// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1). +bool ReportUnrecognizedArguments(int argc, char** argv); + // Generate a list of benchmarks matching the specified --benchmark_filter flag // and if --benchmark_list_tests is specified return after printing the name // of each matching benchmark. Otherwise run each matching benchmark and @@ -197,19 +207,6 @@ class Benchmark; class BenchmarkImp; class BenchmarkFamilies; -template -struct Voider { - typedef void type; -}; - -template -struct EnableIfString {}; - -template -struct EnableIfString::type> { - typedef int type; -}; - void UseCharPointer(char const volatile*); // Take ownership of the pointer and register the benchmark. Return the @@ -222,11 +219,16 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); } // end namespace internal + +#if !defined(__GNUC__) || defined(__pnacl__) || defined(EMSCRIPTN) +# define BENCHMARK_HAS_NO_INLINE_ASSEMBLY +#endif + // The DoNotOptimize(...) function can be used to prevent a value or // expression from being optimized away by the compiler. This function is // intended to add little to no overhead. // See: https://youtu.be/nXaxk27zwlk?t=2441 -#if defined(__GNUC__) +#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { asm volatile("" : : "g"(value) : "memory"); @@ -236,14 +238,57 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { asm volatile("" : : : "memory"); } +#elif defined(_MSC_VER) +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + internal::UseCharPointer(&reinterpret_cast(value)); + _ReadWriteBarrier(); +} + +inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { + _ReadWriteBarrier(); +} #else template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast(value)); } -// FIXME Add ClobberMemory() for non-gnu compilers +// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers #endif + + +// This class is used for user-defined counters. +class Counter { +public: + + enum Flags { + kDefaults = 0, + // Mark the counter as a rate. It will be presented divided + // by the duration of the benchmark. + kIsRate = 1, + // Mark the counter as a thread-average quantity. It will be + // presented divided by the number of threads. + kAvgThreads = 2, + // Mark the counter as a thread-average rate. See above. + kAvgThreadsRate = kIsRate|kAvgThreads + }; + + double value; + Flags flags; + + BENCHMARK_ALWAYS_INLINE + Counter(double v = 0., Flags f = kDefaults) : value(v), flags(f) {} + + BENCHMARK_ALWAYS_INLINE operator double const& () const { return value; } + BENCHMARK_ALWAYS_INLINE operator double & () { return value; } + +}; + +// This is the container for the user-defined counters. +typedef std::map UserCounters; + + // TimeUnit is passed to a benchmark in order to specify the order of magnitude // for the measured time. enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond }; @@ -393,13 +438,7 @@ class State { // REQUIRES: a benchmark has exited its KeepRunning loop. void SetLabel(const char* label); - // Allow the use of std::string without actually including . - // This function does not participate in overload resolution unless StringType - // has the nested typename `basic_string`. This typename should be provided - // as an injected class name in the case of std::string. - template - void SetLabel(StringType const& str, - typename internal::EnableIfString::type = 1) { + void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) { this->SetLabel(str.c_str()); } @@ -434,6 +473,8 @@ class State { bool error_occurred_; public: + // Container for user-defined counters. + UserCounters counters; // Index of the executing thread. Values from [0, threads). const int thread_index; // Number of threads concurrently executing the benchmark. @@ -536,9 +577,17 @@ class Benchmark { // Set the minimum amount of time to use when running this benchmark. This // option overrides the `benchmark_min_time` flag. - // REQUIRES: `t > 0` + // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. Benchmark* MinTime(double t); + // Specify the amount of iterations that should be run by this benchmark. + // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark. + // + // NOTE: This function should only be used when *exact* iteration control is + // needed and never to control or limit how long a benchmark runs, where + // `--benchmark_min_time=N` or `MinTime(...)` should be used instead. + Benchmark* Iterations(size_t n); + // Specify the amount of times to repeat this benchmark. This option overrides // the `benchmark_repetitions` flag. // REQUIRES: `n > 0` @@ -627,6 +676,7 @@ class Benchmark { TimeUnit time_unit_; int range_multiplier_; double min_time_; + size_t iterations_; int repetitions_; bool use_real_time_; bool use_manual_time_; @@ -858,6 +908,7 @@ class Fixture : public internal::Benchmark { #define BENCHMARK_MAIN() \ int main(int argc, char** argv) { \ ::benchmark::Initialize(&argc, argv); \ + if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ ::benchmark::RunSpecifiedBenchmarks(); \ } diff --git a/utils/google-benchmark/include/benchmark/reporter.h b/utils/google-benchmark/include/benchmark/reporter.h index 8c39e7f7f..789124ba8 100644 --- a/utils/google-benchmark/include/benchmark/reporter.h +++ b/utils/google-benchmark/include/benchmark/reporter.h @@ -19,6 +19,7 @@ #include #include #include +#include #include "benchmark_api.h" // For forward declaration of BenchmarkReporter @@ -54,7 +55,8 @@ class BenchmarkReporter { complexity_lambda(), complexity_n(0), report_big_o(false), - report_rms(false) {} + report_rms(false), + counters() {} std::string benchmark_name; std::string report_label; // Empty if not set by benchmark. @@ -93,6 +95,8 @@ class BenchmarkReporter { // Inform print function whether the current run is a complexity report bool report_big_o; bool report_rms; + + UserCounters counters; }; // Construct a BenchmarkReporter with the output stream set to 'std::cout' @@ -163,7 +167,10 @@ class ConsoleReporter : public BenchmarkReporter { protected: virtual void PrintRunData(const Run& report); + virtual void PrintHeader(const Run& report); + size_t name_field_width_; + bool printed_header_; private: bool color_output_; @@ -184,11 +191,15 @@ class JSONReporter : public BenchmarkReporter { class CSVReporter : public BenchmarkReporter { public: + CSVReporter() : printed_header_(false) {} virtual bool ReportContext(const Context& context); virtual void ReportRuns(const std::vector& reports); private: void PrintRunData(const Run& report); + + bool printed_header_; + std::set< std::string > user_counter_names_; }; inline const char* GetTimeUnitString(TimeUnit unit) { diff --git a/utils/google-benchmark/mingw.py b/utils/google-benchmark/mingw.py new file mode 100644 index 000000000..706ad559d --- /dev/null +++ b/utils/google-benchmark/mingw.py @@ -0,0 +1,320 @@ +#! /usr/bin/env python +# encoding: utf-8 + +import argparse +import errno +import logging +import os +import platform +import re +import sys +import subprocess +import tempfile + +try: + import winreg +except ImportError: + import _winreg as winreg +try: + import urllib.request as request +except ImportError: + import urllib as request +try: + import urllib.parse as parse +except ImportError: + import urlparse as parse + +class EmptyLogger(object): + ''' + Provides an implementation that performs no logging + ''' + def debug(self, *k, **kw): + pass + def info(self, *k, **kw): + pass + def warn(self, *k, **kw): + pass + def error(self, *k, **kw): + pass + def critical(self, *k, **kw): + pass + def setLevel(self, *k, **kw): + pass + +urls = ( + 'http://downloads.sourceforge.net/project/mingw-w64/Toolchains%20' + 'targetting%20Win32/Personal%20Builds/mingw-builds/installer/' + 'repository.txt', + 'http://downloads.sourceforge.net/project/mingwbuilds/host-windows/' + 'repository.txt' +) +''' +A list of mingw-build repositories +''' + +def repository(urls = urls, log = EmptyLogger()): + ''' + Downloads and parse mingw-build repository files and parses them + ''' + log.info('getting mingw-builds repository') + versions = {} + re_sourceforge = re.compile(r'http://sourceforge.net/projects/([^/]+)/files') + re_sub = r'http://downloads.sourceforge.net/project/\1' + for url in urls: + log.debug(' - requesting: %s', url) + socket = request.urlopen(url) + repo = socket.read() + if not isinstance(repo, str): + repo = repo.decode(); + socket.close() + for entry in repo.split('\n')[:-1]: + value = entry.split('|') + version = tuple([int(n) for n in value[0].strip().split('.')]) + version = versions.setdefault(version, {}) + arch = value[1].strip() + if arch == 'x32': + arch = 'i686' + elif arch == 'x64': + arch = 'x86_64' + arch = version.setdefault(arch, {}) + threading = arch.setdefault(value[2].strip(), {}) + exceptions = threading.setdefault(value[3].strip(), {}) + revision = exceptions.setdefault(int(value[4].strip()[3:]), + re_sourceforge.sub(re_sub, value[5].strip())) + return versions + +def find_in_path(file, path=None): + ''' + Attempts to find an executable in the path + ''' + if platform.system() == 'Windows': + file += '.exe' + if path is None: + path = os.environ.get('PATH', '') + if type(path) is type(''): + path = path.split(os.pathsep) + return list(filter(os.path.exists, + map(lambda dir, file=file: os.path.join(dir, file), path))) + +def find_7zip(log = EmptyLogger()): + ''' + Attempts to find 7zip for unpacking the mingw-build archives + ''' + log.info('finding 7zip') + path = find_in_path('7z') + if not path: + key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\7-Zip') + path, _ = winreg.QueryValueEx(key, 'Path') + path = [os.path.join(path, '7z.exe')] + log.debug('found \'%s\'', path[0]) + return path[0] + +find_7zip() + +def unpack(archive, location, log = EmptyLogger()): + ''' + Unpacks a mingw-builds archive + ''' + sevenzip = find_7zip(log) + log.info('unpacking %s', os.path.basename(archive)) + cmd = [sevenzip, 'x', archive, '-o' + location, '-y'] + log.debug(' - %r', cmd) + with open(os.devnull, 'w') as devnull: + subprocess.check_call(cmd, stdout = devnull) + +def download(url, location, log = EmptyLogger()): + ''' + Downloads and unpacks a mingw-builds archive + ''' + log.info('downloading MinGW') + log.debug(' - url: %s', url) + log.debug(' - location: %s', location) + + re_content = re.compile(r'attachment;[ \t]*filename=(")?([^"]*)(")?[\r\n]*') + + stream = request.urlopen(url) + try: + content = stream.getheader('Content-Disposition') or '' + except AttributeError: + content = stream.headers.getheader('Content-Disposition') or '' + matches = re_content.match(content) + if matches: + filename = matches.group(2) + else: + parsed = parse.urlparse(stream.geturl()) + filename = os.path.basename(parsed.path) + + try: + os.makedirs(location) + except OSError as e: + if e.errno == errno.EEXIST and os.path.isdir(location): + pass + else: + raise + + archive = os.path.join(location, filename) + with open(archive, 'wb') as out: + while True: + buf = stream.read(1024) + if not buf: + break + out.write(buf) + unpack(archive, location, log = log) + os.remove(archive) + + possible = os.path.join(location, 'mingw64') + if not os.path.exists(possible): + possible = os.path.join(location, 'mingw32') + if not os.path.exists(possible): + raise ValueError('Failed to find unpacked MinGW: ' + possible) + return possible + +def root(location = None, arch = None, version = None, threading = None, + exceptions = None, revision = None, log = EmptyLogger()): + ''' + Returns the root folder of a specific version of the mingw-builds variant + of gcc. Will download the compiler if needed + ''' + + # Get the repository if we don't have all the information + if not (arch and version and threading and exceptions and revision): + versions = repository(log = log) + + # Determine some defaults + version = version or max(versions.keys()) + if not arch: + arch = platform.machine().lower() + if arch == 'x86': + arch = 'i686' + elif arch == 'amd64': + arch = 'x86_64' + if not threading: + keys = versions[version][arch].keys() + if 'posix' in keys: + threading = 'posix' + elif 'win32' in keys: + threading = 'win32' + else: + threading = keys[0] + if not exceptions: + keys = versions[version][arch][threading].keys() + if 'seh' in keys: + exceptions = 'seh' + elif 'sjlj' in keys: + exceptions = 'sjlj' + else: + exceptions = keys[0] + if revision == None: + revision = max(versions[version][arch][threading][exceptions].keys()) + if not location: + location = os.path.join(tempfile.gettempdir(), 'mingw-builds') + + # Get the download url + url = versions[version][arch][threading][exceptions][revision] + + # Tell the user whatzzup + log.info('finding MinGW %s', '.'.join(str(v) for v in version)) + log.debug(' - arch: %s', arch) + log.debug(' - threading: %s', threading) + log.debug(' - exceptions: %s', exceptions) + log.debug(' - revision: %s', revision) + log.debug(' - url: %s', url) + + # Store each specific revision differently + slug = '{version}-{arch}-{threading}-{exceptions}-rev{revision}' + slug = slug.format( + version = '.'.join(str(v) for v in version), + arch = arch, + threading = threading, + exceptions = exceptions, + revision = revision + ) + if arch == 'x86_64': + root_dir = os.path.join(location, slug, 'mingw64') + elif arch == 'i686': + root_dir = os.path.join(location, slug, 'mingw32') + else: + raise ValueError('Unknown MinGW arch: ' + arch) + + # Download if needed + if not os.path.exists(root_dir): + downloaded = download(url, os.path.join(location, slug), log = log) + if downloaded != root_dir: + raise ValueError('The location of mingw did not match\n%s\n%s' + % (downloaded, root_dir)) + + return root_dir + +def str2ver(string): + ''' + Converts a version string into a tuple + ''' + try: + version = tuple(int(v) for v in string.split('.')) + if len(version) is not 3: + raise ValueError() + except ValueError: + raise argparse.ArgumentTypeError( + 'please provide a three digit version string') + return version + +def main(): + ''' + Invoked when the script is run directly by the python interpreter + ''' + parser = argparse.ArgumentParser( + description = 'Downloads a specific version of MinGW', + formatter_class = argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument('--location', + help = 'the location to download the compiler to', + default = os.path.join(tempfile.gettempdir(), 'mingw-builds')) + parser.add_argument('--arch', required = True, choices = ['i686', 'x86_64'], + help = 'the target MinGW architecture string') + parser.add_argument('--version', type = str2ver, + help = 'the version of GCC to download') + parser.add_argument('--threading', choices = ['posix', 'win32'], + help = 'the threading type of the compiler') + parser.add_argument('--exceptions', choices = ['sjlj', 'seh', 'dwarf'], + help = 'the method to throw exceptions') + parser.add_argument('--revision', type=int, + help = 'the revision of the MinGW release') + group = parser.add_mutually_exclusive_group() + group.add_argument('-v', '--verbose', action='store_true', + help='increase the script output verbosity') + group.add_argument('-q', '--quiet', action='store_true', + help='only print errors and warning') + args = parser.parse_args() + + # Create the logger + logger = logging.getLogger('mingw') + handler = logging.StreamHandler() + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + if args.quiet: + logger.setLevel(logging.WARN) + if args.verbose: + logger.setLevel(logging.DEBUG) + + # Get MinGW + root_dir = root(location = args.location, arch = args.arch, + version = args.version, threading = args.threading, + exceptions = args.exceptions, revision = args.revision, + log = logger) + + sys.stdout.write('%s\n' % os.path.join(root_dir, 'bin')) + +if __name__ == '__main__': + try: + main() + except IOError as e: + sys.stderr.write('IO error: %s\n' % e) + sys.exit(1) + except OSError as e: + sys.stderr.write('OS error: %s\n' % e) + sys.exit(1) + except KeyboardInterrupt as e: + sys.stderr.write('Killed\n') + sys.exit(1) diff --git a/utils/google-benchmark/src/CMakeLists.txt b/utils/google-benchmark/src/CMakeLists.txt index 403887517..770777393 100644 --- a/utils/google-benchmark/src/CMakeLists.txt +++ b/utils/google-benchmark/src/CMakeLists.txt @@ -21,24 +21,55 @@ set_target_properties(benchmark PROPERTIES # Link threads. target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) +find_library(LIBRT rt) +if(LIBRT) + target_link_libraries(benchmark ${LIBRT}) +endif() # We need extra libraries on Windows if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") target_link_libraries(benchmark Shlwapi) endif() -# Expose public API -target_include_directories(benchmark PUBLIC ${PROJECT_SOURCE_DIR}/include) +set(include_install_dir "include") +set(lib_install_dir "lib/") +set(bin_install_dir "bin/") +set(config_install_dir "lib/cmake/${PROJECT_NAME}") + +set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") + +set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") +set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake") +set(targets_export_name "${PROJECT_NAME}Targets") + +set(namespace "${PROJECT_NAME}::") + +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + "${version_config}" VERSION ${GIT_VERSION} COMPATIBILITY SameMajorVersion +) + +configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY) # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable) install( TARGETS benchmark - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib - RUNTIME DESTINATION bin - COMPONENT library) + EXPORT ${targets_export_name} + ARCHIVE DESTINATION ${lib_install_dir} + LIBRARY DESTINATION ${lib_install_dir} + RUNTIME DESTINATION ${bin_install_dir} + INCLUDES DESTINATION ${include_install_dir}) install( DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark" - DESTINATION include + DESTINATION ${include_install_dir} FILES_MATCHING PATTERN "*.*h") + +install( + FILES "${project_config}" "${version_config}" + DESTINATION "${config_install_dir}") + +install( + EXPORT "${targets_export_name}" + NAMESPACE "${namespace}" + DESTINATION "${config_install_dir}") diff --git a/utils/google-benchmark/src/benchmark.cc b/utils/google-benchmark/src/benchmark.cc index 95f6a25b7..00ffa07ff 100644 --- a/utils/google-benchmark/src/benchmark.cc +++ b/utils/google-benchmark/src/benchmark.cc @@ -37,6 +37,7 @@ #include "colorprint.h" #include "commandlineflags.h" #include "complexity.h" +#include "counter.h" #include "log.h" #include "mutex.h" #include "re.h" @@ -145,6 +146,7 @@ class ThreadManager { std::string report_label_; std::string error_message_; bool has_error_ = false; + UserCounters counters; }; GUARDED_BY(GetBenchmarkMutex()) Result results; @@ -249,6 +251,7 @@ BenchmarkReporter::Run CreateRunReport( report.complexity_n = results.complexity_n; report.complexity = b.complexity; report.complexity_lambda = b.complexity_lambda; + report.counters = results.counters; } return report; } @@ -272,6 +275,7 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, results.bytes_processed += st.bytes_processed(); results.items_processed += st.items_processed(); results.complexity_n += st.complexity_length_n(); + internal::Increment(&results.counters, st.counters); } manager->NotifyThreadComplete(); } @@ -281,7 +285,8 @@ std::vector RunBenchmark( std::vector* complexity_reports) { std::vector reports; // return value - size_t iters = 1; + const bool has_explicit_iteration_count = b.iterations != 0; + size_t iters = has_explicit_iteration_count ? b.iterations : 1; std::unique_ptr manager; std::vector pool(b.threads - 1); const int repeats = @@ -291,7 +296,7 @@ std::vector RunBenchmark( (b.report_mode == internal::RM_Unspecified ? FLAGS_benchmark_report_aggregates_only : b.report_mode == internal::RM_ReportAggregatesOnly); - for (int i = 0; i < repeats; i++) { + for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { for (;;) { // Try benchmark VLOG(2) << "Running " << b.name << " for " << iters << "\n"; @@ -327,10 +332,20 @@ std::vector RunBenchmark( const double min_time = !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time; - // If this was the first run, was elapsed time or cpu time large enough? - // If this is not the first run, go with the current value of iter. - if ((i > 0) || results.has_error_ || (iters >= kMaxIterations) || - (seconds >= min_time) || (results.real_time_used >= 5 * min_time)) { + + // Determine if this run should be reported; Either it has + // run for a sufficient amount of time or because an error was reported. + const bool should_report = repetition_num > 0 + || has_explicit_iteration_count // An exact iteration count was requested + || results.has_error_ + || iters >= kMaxIterations + || seconds >= min_time // the elapsed time is large enough + // CPU time is specified but the elapsed real time greatly exceeds the + // minimum time. Note that user provided timers are except from this + // sanity check. + || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time); + + if (should_report) { BenchmarkReporter::Run report = CreateRunReport(b, results, iters, seconds); if (!report.error_occurred && b.complexity != oNone) @@ -386,6 +401,7 @@ State::State(size_t max_iters, const std::vector& ranges, int thread_i, items_processed_(0), complexity_n_(0), error_occurred_(false), + counters(), thread_index(thread_i), threads(n_threads), max_iterations(max_iters), @@ -634,7 +650,7 @@ void ParseCommandLineFlags(int* argc, char** argv) { // TODO: Remove this. ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) || ParseInt32Flag(argv[i], "v", &FLAGS_v)) { - for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1]; + for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1]; --(*argc); --i; @@ -664,4 +680,11 @@ void Initialize(int* argc, char** argv) { internal::LogLevel() = FLAGS_v; } +bool ReportUnrecognizedArguments(int argc, char** argv) { + for (int i = 1; i < argc; ++i) { + fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], argv[i]); + } + return argc > 1; +} + } // end namespace benchmark diff --git a/utils/google-benchmark/src/benchmark_api_internal.h b/utils/google-benchmark/src/benchmark_api_internal.h index 8b97ce600..828ed1216 100644 --- a/utils/google-benchmark/src/benchmark_api_internal.h +++ b/utils/google-benchmark/src/benchmark_api_internal.h @@ -24,9 +24,11 @@ struct Benchmark::Instance { bool use_manual_time; BigO complexity; BigOFunc* complexity_lambda; + UserCounters counters; bool last_benchmark_instance; int repetitions; double min_time; + size_t iterations; int threads; // Number of concurrent threads to us }; diff --git a/utils/google-benchmark/src/benchmark_register.cc b/utils/google-benchmark/src/benchmark_register.cc index 4e580d8ea..fe3732041 100644 --- a/utils/google-benchmark/src/benchmark_register.cc +++ b/utils/google-benchmark/src/benchmark_register.cc @@ -143,6 +143,7 @@ bool BenchmarkFamilies::FindBenchmarks( instance.time_unit = family->time_unit_; instance.range_multiplier = family->range_multiplier_; instance.min_time = family->min_time_; + instance.iterations = family->iterations_; instance.repetitions = family->repetitions_; instance.use_real_time = family->use_real_time_; instance.use_manual_time = family->use_manual_time_; @@ -163,16 +164,17 @@ bool BenchmarkFamilies::FindBenchmarks( } } - AppendHumanReadable(arg, &instance.name); + instance.name += std::to_string(arg); ++arg_i; } - if (!IsZero(family->min_time_)) { + if (!IsZero(family->min_time_)) instance.name += StringPrintF("/min_time:%0.3f", family->min_time_); - } - if (family->repetitions_ != 0) { + if (family->iterations_ != 0) + instance.name += StringPrintF("/iterations:%d", family->iterations_); + if (family->repetitions_ != 0) instance.name += StringPrintF("/repeats:%d", family->repetitions_); - } + if (family->use_manual_time_) { instance.name += "/manual_time"; } else if (family->use_real_time_) { @@ -219,6 +221,7 @@ Benchmark::Benchmark(const char* name) time_unit_(kNanosecond), range_multiplier_(kRangeMultiplier), min_time_(0), + iterations_(0), repetitions_(0), use_real_time_(false), use_manual_time_(false), @@ -344,6 +347,22 @@ Benchmark* Benchmark::RangeMultiplier(int multiplier) { return this; } + +Benchmark* Benchmark::MinTime(double t) { + CHECK(t > 0.0); + CHECK(iterations_ == 0); + min_time_ = t; + return this; +} + + +Benchmark* Benchmark::Iterations(size_t n) { + CHECK(n > 0); + CHECK(IsZero(min_time_)); + iterations_ = n; + return this; +} + Benchmark* Benchmark::Repetitions(int n) { CHECK(n > 0); repetitions_ = n; @@ -355,12 +374,6 @@ Benchmark* Benchmark::ReportAggregatesOnly(bool value) { return this; } -Benchmark* Benchmark::MinTime(double t) { - CHECK(t > 0.0); - min_time_ = t; - return this; -} - Benchmark* Benchmark::UseRealTime() { CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; diff --git a/utils/google-benchmark/src/complexity.cc b/utils/google-benchmark/src/complexity.cc index dfab791a3..02adbef62 100644 --- a/utils/google-benchmark/src/complexity.cc +++ b/utils/google-benchmark/src/complexity.cc @@ -171,6 +171,22 @@ std::vector ComputeStats( // All repetitions should be run with the same number of iterations so we // can take this information from the first benchmark. int64_t const run_iterations = reports.front().iterations; + // create stats for user counters + struct CounterStat { + Counter c; + Stat1_d s; + }; + std::map< std::string, CounterStat > counter_stats; + for(Run const& r : reports) { + for(auto const& cnt : r.counters) { + auto it = counter_stats.find(cnt.first); + if(it == counter_stats.end()) { + counter_stats.insert({cnt.first, {cnt.second, Stat1_d{}}}); + } else { + CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); + } + } + } // Populate the accumulators. for (Run const& run : reports) { @@ -183,6 +199,12 @@ std::vector ComputeStats( Stat1_d(run.cpu_accumulated_time / run.iterations, run.iterations); items_per_second_stat += Stat1_d(run.items_per_second, run.iterations); bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations); + // user counters + for(auto const& cnt : run.counters) { + auto it = counter_stats.find(cnt.first); + CHECK_NE(it, counter_stats.end()); + it->second.s += Stat1_d(cnt.second, run.iterations); + } } // Get the data from the accumulator to BenchmarkReporter::Run's. @@ -196,6 +218,11 @@ std::vector ComputeStats( mean_data.bytes_per_second = bytes_per_second_stat.Mean(); mean_data.items_per_second = items_per_second_stat.Mean(); mean_data.time_unit = reports[0].time_unit; + // user counters + for(auto const& kv : counter_stats) { + auto c = Counter(kv.second.s.Mean(), counter_stats[kv.first].c.flags); + mean_data.counters[kv.first] = c; + } // Only add label to mean/stddev if it is same for all runs mean_data.report_label = reports[0].report_label; @@ -215,6 +242,11 @@ std::vector ComputeStats( stddev_data.bytes_per_second = bytes_per_second_stat.StdDev(); stddev_data.items_per_second = items_per_second_stat.StdDev(); stddev_data.time_unit = reports[0].time_unit; + // user counters + for(auto const& kv : counter_stats) { + auto c = Counter(kv.second.s.StdDev(), counter_stats[kv.first].c.flags); + stddev_data.counters[kv.first] = c; + } results.push_back(mean_data); results.push_back(stddev_data); @@ -263,6 +295,11 @@ std::vector ComputeBigO( big_o.report_big_o = true; big_o.complexity = result_cpu.complexity; + // All the time results are reported after being multiplied by the + // time unit multiplier. But since RMS is a relative quantity it + // should not be multiplied at all. So, here, we _divide_ it by the + // multiplier so that when it is multiplied later the result is the + // correct one. double multiplier = GetTimeUnitMultiplier(reports[0].time_unit); // Only add label to mean/stddev if it is same for all runs @@ -275,6 +312,9 @@ std::vector ComputeBigO( rms.cpu_accumulated_time = result_cpu.rms / multiplier; rms.report_rms = true; rms.complexity = result_cpu.complexity; + // don't forget to keep the time unit, or we won't be able to + // recover the correct value. + rms.time_unit = reports[0].time_unit; results.push_back(big_o); results.push_back(rms); diff --git a/utils/google-benchmark/src/console_reporter.cc b/utils/google-benchmark/src/console_reporter.cc index 7e0cca3e3..3f3de0294 100644 --- a/utils/google-benchmark/src/console_reporter.cc +++ b/utils/google-benchmark/src/console_reporter.cc @@ -14,6 +14,7 @@ #include "benchmark/reporter.h" #include "complexity.h" +#include "counter.h" #include #include @@ -34,6 +35,7 @@ namespace benchmark { bool ConsoleReporter::ReportContext(const Context& context) { name_field_width_ = context.name_field_width; + printed_header_ = false; PrintBasicContext(&GetErrorStream(), context); @@ -45,16 +47,32 @@ bool ConsoleReporter::ReportContext(const Context& context) { color_output_ = false; } #endif - std::string str = - FormatString("%-*s %13s %13s %10s\n", static_cast(name_field_width_), - "Benchmark", "Time", "CPU", "Iterations"); - GetOutputStream() << str << std::string(str.length() - 1, '-') << "\n"; return true; } +void ConsoleReporter::PrintHeader(const Run& run) { + std::string str = + FormatString("%-*s %13s %13s %10s\n", static_cast(name_field_width_), + "Benchmark", "Time", "CPU", "Iterations"); + if(!run.counters.empty()) { + str += " UserCounters..."; + } + std::string line = std::string(str.length(), '-'); + GetOutputStream() << line << "\n" << str << line << "\n"; +} + void ConsoleReporter::ReportRuns(const std::vector& reports) { - for (const auto& run : reports) PrintRunData(run); + for (const auto& run : reports) { + // print the header if none was printed yet + if (!printed_header_) { + printed_header_ = true; + PrintHeader(run); + } + // As an alternative to printing the headers like this, we could sort + // the benchmarks by header and then print like that. + PrintRunData(run); + } } static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt, @@ -114,6 +132,11 @@ void ConsoleReporter::PrintRunData(const Run& result) { printer(Out, COLOR_CYAN, "%10lld", result.iterations); } + for (auto& c : result.counters) { + auto const& s = HumanReadableNumber(c.second.value); + printer(Out, COLOR_DEFAULT, " %s=%s", c.first.c_str(), s.c_str()); + } + if (!rate.empty()) { printer(Out, COLOR_DEFAULT, " %*s", 13, rate.c_str()); } diff --git a/utils/google-benchmark/src/counter.cc b/utils/google-benchmark/src/counter.cc new file mode 100644 index 000000000..307863d3c --- /dev/null +++ b/utils/google-benchmark/src/counter.cc @@ -0,0 +1,68 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "counter.h" + +namespace benchmark { +namespace internal { + +double Finish(Counter const& c, double cpu_time, double num_threads) { + double v = c.value; + if (c.flags & Counter::kIsRate) { + v /= cpu_time; + } + if (c.flags & Counter::kAvgThreads) { + v /= num_threads; + } + return v; +} + +void Finish(UserCounters *l, double cpu_time, double num_threads) { + for (auto &c : *l) { + c.second = Finish(c.second, cpu_time, num_threads); + } +} + +void Increment(UserCounters *l, UserCounters const& r) { + // add counters present in both or just in *l + for (auto &c : *l) { + auto it = r.find(c.first); + if (it != r.end()) { + c.second = c.second + it->second; + } + } + // add counters present in r, but not in *l + for (auto const &tc : r) { + auto it = l->find(tc.first); + if (it == l->end()) { + (*l)[tc.first] = tc.second; + } + } +} + +bool SameNames(UserCounters const& l, UserCounters const& r) { + if (&l == &r) return true; + if (l.size() != r.size()) { + return false; + } + for (auto const& c : l) { + if ( r.find(c.first) == r.end()) { + return false; + } + } + return true; +} + +} // end namespace internal +} // end namespace benchmark diff --git a/utils/google-benchmark/src/counter.h b/utils/google-benchmark/src/counter.h new file mode 100644 index 000000000..bbb92d9a2 --- /dev/null +++ b/utils/google-benchmark/src/counter.h @@ -0,0 +1,26 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/benchmark_api.h" + +namespace benchmark { + +// these counter-related functions are hidden to reduce API surface. +namespace internal { +void Finish(UserCounters *l, double time, double num_threads); +void Increment(UserCounters *l, UserCounters const& r); +bool SameNames(UserCounters const& l, UserCounters const& r); +} // end namespace internal + +} //end namespace benchmark diff --git a/utils/google-benchmark/src/csv_reporter.cc b/utils/google-benchmark/src/csv_reporter.cc index 18ab3b668..6779815b3 100644 --- a/utils/google-benchmark/src/csv_reporter.cc +++ b/utils/google-benchmark/src/csv_reporter.cc @@ -24,6 +24,7 @@ #include "string_util.h" #include "timers.h" +#include "check.h" // File format reference: http://edoceo.com/utilitas/csv-file-format. @@ -38,21 +39,51 @@ std::vector elements = { bool CSVReporter::ReportContext(const Context& context) { PrintBasicContext(&GetErrorStream(), context); - - std::ostream& Out = GetOutputStream(); - for (auto B = elements.begin(); B != elements.end();) { - Out << *B++; - if (B != elements.end()) Out << ","; - } - Out << "\n"; return true; } -void CSVReporter::ReportRuns(const std::vector& reports) { - for (const auto& run : reports) PrintRunData(run); +void CSVReporter::ReportRuns(const std::vector & reports) { + std::ostream& Out = GetOutputStream(); + + if (!printed_header_) { + // save the names of all the user counters + for (const auto& run : reports) { + for (const auto& cnt : run.counters) { + user_counter_names_.insert(cnt.first); + } + } + + // print the header + for (auto B = elements.begin(); B != elements.end();) { + Out << *B++; + if (B != elements.end()) Out << ","; + } + for (auto B = user_counter_names_.begin(); B != user_counter_names_.end();) { + Out << ",\"" << *B++ << "\""; + } + Out << "\n"; + + printed_header_ = true; + } else { + // check that all the current counters are saved in the name set + for (const auto& run : reports) { + for (const auto& cnt : run.counters) { + CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end()) + << "All counters must be present in each run. " + << "Counter named \"" << cnt.first + << "\" was not in a run after being added to the header"; + } + } + } + + // print results for each run + for (const auto& run : reports) { + PrintRunData(run); + } + } -void CSVReporter::PrintRunData(const Run& run) { +void CSVReporter::PrintRunData(const Run & run) { std::ostream& Out = GetOutputStream(); // Field with embedded double-quote characters must be doubled and the field @@ -102,6 +133,13 @@ void CSVReporter::PrintRunData(const Run& run) { Out << "\"" << label << "\""; } Out << ",,"; // for error_occurred and error_message + + // Print user counters + for (const auto &ucn : user_counter_names_) { + auto it = run.counters.find(ucn); + CHECK(it != run.counters.end()); + Out << "," << it->second; + } Out << '\n'; } diff --git a/utils/google-benchmark/src/cycleclock.h b/utils/google-benchmark/src/cycleclock.h index e4825d4ba..e0f9b01f9 100644 --- a/utils/google-benchmark/src/cycleclock.h +++ b/utils/google-benchmark/src/cycleclock.h @@ -43,6 +43,11 @@ extern "C" uint64_t __rdtsc(); #ifndef BENCHMARK_OS_WINDOWS #include +#include +#endif + +#ifdef BENCHMARK_OS_EMSCRIPTEN +#include #endif namespace benchmark { @@ -65,6 +70,10 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { // counter pauses; it does not continue counting, nor does it // reset to zero. return mach_absolute_time(); +#elif defined(BENCHMARK_OS_EMSCRIPTEN) + // this goes above x86-specific code because old versions of Emscripten + // define __x86_64__, although they have nothing to do with it. + return static_cast(emscripten_get_now() * 1e+6); #elif defined(__i386__) int64_t ret; __asm__ volatile("rdtsc" : "=A"(ret)); @@ -79,7 +88,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { asm("mftbu %0" : "=r"(tbu0)); asm("mftb %0" : "=r"(tbl)); asm("mftbu %0" : "=r"(tbu1)); - tbl &= -static_cast(tbu0 == tbu1); + tbl &= -static_cast(tbu0 == tbu1); // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) return (tbu1 << 32) | tbl; #elif defined(__sparc__) @@ -99,6 +108,22 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { _asm rdtsc #elif defined(COMPILER_MSVC) return __rdtsc(); +#elif defined(BENCHMARK_OS_NACL) + // Native Client validator on x86/x86-64 allows RDTSC instructions, + // and this case is handled above. Native Client validator on ARM + // rejects MRC instructions (used in the ARM-specific sequence below), + // so we handle it here. Portable Native Client compiles to + // architecture-agnostic bytecode, which doesn't provide any + // cycle counter access mnemonics. + + // Native Client does not provide any API to access cycle counter. + // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday + // because is provides nanosecond resolution (which is noticable at + // least for PNaCl modules running on x86 Mac & Linux). + // Initialize to always return 0 if clock_gettime fails. + struct timespec ts = { 0, 0 }; + clock_gettime(CLOCK_MONOTONIC, &ts); + return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; #elif defined(__aarch64__) // System timer of ARMv8 runs at a different frequency than the CPU's. // The frequency is fixed, typically in the range 1-50MHz. It can be @@ -108,7 +133,9 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); return virtual_timer_value; #elif defined(__ARM_ARCH) -#if (__ARM_ARCH >= 6) // V6 is the earliest arch that has a standard cyclecount + // V6 is the earliest arch that has a standard cyclecount + // Native Client validator doesn't allow MRC instructions. +#if (__ARM_ARCH >= 6) uint32_t pmccntr; uint32_t pmuseren; uint32_t pmcntenset; diff --git a/utils/google-benchmark/src/internal_macros.h b/utils/google-benchmark/src/internal_macros.h index e8efcbb39..ab9dd85c1 100644 --- a/utils/google-benchmark/src/internal_macros.h +++ b/utils/google-benchmark/src/internal_macros.h @@ -30,13 +30,26 @@ #elif defined(_WIN32) #define BENCHMARK_OS_WINDOWS 1 #elif defined(__APPLE__) -// TODO(ericwf) This doesn't actually check that it is a Mac OSX system. Just -// that it is an apple system. -#define BENCHMARK_OS_MACOSX 1 +#include "TargetConditionals.h" + #if defined(TARGET_OS_MAC) + #define BENCHMARK_OS_MACOSX 1 + #if defined(TARGET_OS_IPHONE) + #define BENCHMARK_OS_IOS 1 + #endif + #endif #elif defined(__FreeBSD__) #define BENCHMARK_OS_FREEBSD 1 #elif defined(__linux__) #define BENCHMARK_OS_LINUX 1 +#elif defined(__native_client__) +#define BENCHMARK_OS_NACL 1 +#elif defined(EMSCRIPTEN) +#define BENCHMARK_OS_EMSCRIPTEN 1 +#endif + +#if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \ + && !defined(__EXCEPTIONS) +#define BENCHMARK_HAS_NO_EXCEPTIONS #endif #endif // BENCHMARK_INTERNAL_MACROS_H_ diff --git a/utils/google-benchmark/src/json_reporter.cc b/utils/google-benchmark/src/json_reporter.cc index cea5f9bfa..5a653088e 100644 --- a/utils/google-benchmark/src/json_reporter.cc +++ b/utils/google-benchmark/src/json_reporter.cc @@ -154,10 +154,15 @@ void JSONReporter::PrintRunData(Run const& run) { << indent << FormatKV("items_per_second", RoundDouble(run.items_per_second)); } + for(auto &c : run.counters) { + out << ",\n" + << indent + << FormatKV(c.first, RoundDouble(c.second)); + } if (!run.report_label.empty()) { out << ",\n" << indent << FormatKV("label", run.report_label); } out << '\n'; } -} // end namespace benchmark +} // end namespace benchmark diff --git a/utils/google-benchmark/src/re.h b/utils/google-benchmark/src/re.h index af4a498c9..01e973650 100644 --- a/utils/google-benchmark/src/re.h +++ b/utils/google-benchmark/src/re.h @@ -15,6 +15,15 @@ #ifndef BENCHMARK_RE_H_ #define BENCHMARK_RE_H_ +#include "internal_macros.h" + +// Prefer C regex libraries when compiling w/o exceptions so that we can +// correctly report errors. +#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && defined(HAVE_STD_REGEX) && \ + (defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX)) +#undef HAVE_STD_REGEX +#endif + #if defined(HAVE_STD_REGEX) #include #elif defined(HAVE_GNU_POSIX_REGEX) @@ -62,15 +71,20 @@ class Regex { #if defined(HAVE_STD_REGEX) inline bool Regex::Init(const std::string& spec, std::string* error) { +#ifdef BENCHMARK_HAS_NO_EXCEPTIONS + ((void)error); // suppress unused warning +#else try { +#endif re_ = std::regex(spec, std::regex_constants::extended); - init_ = true; +#ifndef BENCHMARK_HAS_NO_EXCEPTIONS } catch (const std::regex_error& e) { if (error) { *error = e.what(); } } +#endif return init_; } diff --git a/utils/google-benchmark/src/sleep.cc b/utils/google-benchmark/src/sleep.cc index 918abc485..54aa04a42 100644 --- a/utils/google-benchmark/src/sleep.cc +++ b/utils/google-benchmark/src/sleep.cc @@ -15,6 +15,7 @@ #include "sleep.h" #include +#include #include #include "internal_macros.h" @@ -40,7 +41,7 @@ void SleepForMicroseconds(int microseconds) { } void SleepForMilliseconds(int milliseconds) { - SleepForMicroseconds(static_cast(milliseconds) * kNumMicrosPerMilli); + SleepForMicroseconds(milliseconds * kNumMicrosPerMilli); } void SleepForSeconds(double seconds) { diff --git a/utils/google-benchmark/src/sleep.h b/utils/google-benchmark/src/sleep.h index f1e515ca4..f98551afe 100644 --- a/utils/google-benchmark/src/sleep.h +++ b/utils/google-benchmark/src/sleep.h @@ -1,14 +1,12 @@ #ifndef BENCHMARK_SLEEP_H_ #define BENCHMARK_SLEEP_H_ -#include - namespace benchmark { -const int64_t kNumMillisPerSecond = 1000LL; -const int64_t kNumMicrosPerMilli = 1000LL; -const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL; -const int64_t kNumNanosPerMicro = 1000LL; -const int64_t kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond; +const int kNumMillisPerSecond = 1000; +const int kNumMicrosPerMilli = 1000; +const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000; +const int kNumNanosPerMicro = 1000; +const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond; void SleepForMilliseconds(int milliseconds); void SleepForSeconds(double seconds); diff --git a/utils/google-benchmark/src/string_util.cc b/utils/google-benchmark/src/string_util.cc index 4cefbfba6..cd4e7cfde 100644 --- a/utils/google-benchmark/src/string_util.cc +++ b/utils/google-benchmark/src/string_util.cc @@ -45,6 +45,8 @@ void ToExponentAndMantissa(double val, double thresh, int precision, std::max(thresh, 1.0 / std::pow(10.0, precision)); const double big_threshold = adjusted_threshold * one_k; const double small_threshold = adjusted_threshold; + // Values in ]simple_threshold,small_threshold[ will be printed as-is + const double simple_threshold = 0.01; if (val > big_threshold) { // Positive powers @@ -62,14 +64,16 @@ void ToExponentAndMantissa(double val, double thresh, int precision, *exponent = 0; } else if (val < small_threshold) { // Negative powers - double scaled = val; - for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) { - scaled *= one_k; - if (scaled >= small_threshold) { - mantissa_stream << scaled; - *exponent = -static_cast(i + 1); - *mantissa = mantissa_stream.str(); - return; + if (val < simple_threshold) { + double scaled = val; + for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) { + scaled *= one_k; + if (scaled >= small_threshold) { + mantissa_stream << scaled; + *exponent = -static_cast(i + 1); + *mantissa = mantissa_stream.str(); + return; + } } } mantissa_stream << val; diff --git a/utils/google-benchmark/src/sysinfo.cc b/utils/google-benchmark/src/sysinfo.cc index dd1e66306..7feb79e65 100644 --- a/utils/google-benchmark/src/sysinfo.cc +++ b/utils/google-benchmark/src/sysinfo.cc @@ -75,7 +75,9 @@ bool ReadIntFromFile(const char* file, long* value) { char line[1024]; char* err; memset(line, '\0', sizeof(line)); - CHECK(read(fd, line, sizeof(line) - 1)); + ssize_t read_err = read(fd, line, sizeof(line) - 1); + ((void)read_err); // prevent unused warning + CHECK(read_err >= 0); const long temp_value = strtol(line, &err, 10); if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { *value = temp_value; @@ -295,8 +297,13 @@ void InitializeSystemInfo() { (size == sizeof(cpu_freq))) { cpuinfo_cycles_per_second = cpu_freq; } else { + #if defined BENCHMARK_OS_IOS + fprintf(stderr, "CPU frequency cannot be detected. \n"); + cpuinfo_cycles_per_second = 0; + #else fprintf(stderr, "%s\n", strerror(errno)); std::exit(EXIT_FAILURE); + #endif } #else // Generic cycles per second counter diff --git a/utils/google-benchmark/src/timers.cc b/utils/google-benchmark/src/timers.cc index fadc08f79..8d56e8adf 100644 --- a/utils/google-benchmark/src/timers.cc +++ b/utils/google-benchmark/src/timers.cc @@ -35,6 +35,10 @@ #endif #endif +#ifdef BENCHMARK_OS_EMSCRIPTEN +#include +#endif + #include #include #include @@ -100,14 +104,7 @@ BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) { } // end namespace double ProcessCPUUsage() { -// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See -// https://github.com/google/benchmark/pull/292 -#if defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) - struct timespec spec; - if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) - return MakeTime(spec); - DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); -#elif defined(BENCHMARK_OS_WINDOWS) +#if defined(BENCHMARK_OS_WINDOWS) HANDLE proc = GetCurrentProcess(); FILETIME creation_time; FILETIME exit_time; @@ -117,21 +114,28 @@ double ProcessCPUUsage() { &user_time)) return MakeTime(kernel_time, user_time); DiagnoseAndExit("GetProccessTimes() failed"); +#elif defined(BENCHMARK_OS_EMSCRIPTEN) + // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten. + // Use Emscripten-specific API. Reported CPU time would be exactly the + // same as total time, but this is ok because there aren't long-latency + // syncronous system calls in Emscripten. + return emscripten_get_now() * 1e-3; +#elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) + // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See + // https://github.com/google/benchmark/pull/292 + struct timespec spec; + if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) + return MakeTime(spec); + DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); #else struct rusage ru; if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru); - DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); + DiagnoseAndExit("getrusage(RUSAGE_SELF, ...) failed"); #endif } double ThreadCPUUsage() { -// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See -// https://github.com/google/benchmark/pull/292 -#if defined(CLOCK_THREAD_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) - struct timespec ts; - if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts); - DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed"); -#elif defined(BENCHMARK_OS_WINDOWS) +#if defined(BENCHMARK_OS_WINDOWS) HANDLE this_thread = GetCurrentThread(); FILETIME creation_time; FILETIME exit_time; @@ -141,6 +145,8 @@ double ThreadCPUUsage() { &user_time); return MakeTime(kernel_time, user_time); #elif defined(BENCHMARK_OS_MACOSX) + // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See + // https://github.com/google/benchmark/pull/292 mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; thread_basic_info_data_t info; mach_port_t thread = pthread_mach_thread_np(pthread_self()); @@ -149,6 +155,13 @@ double ThreadCPUUsage() { return MakeTime(info); } DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); +#elif defined(BENCHMARK_OS_EMSCRIPTEN) + // Emscripten doesn't support traditional threads + return ProcessCPUUsage(); +#elif defined(CLOCK_THREAD_CPUTIME_ID) + struct timespec ts; + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts); + DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed"); #else #error Per-thread timing is not available on your system. #endif diff --git a/utils/google-benchmark/test/CMakeLists.txt b/utils/google-benchmark/test/CMakeLists.txt index 87245984d..14ba7a6e2 100644 --- a/utils/google-benchmark/test/CMakeLists.txt +++ b/utils/google-benchmark/test/CMakeLists.txt @@ -2,6 +2,25 @@ find_package(Threads REQUIRED) +# NOTE: Some tests use `` to perform the test. Therefore we must +# strip -DNDEBUG from the default CMake flags in DEBUG mode. +string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) +if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" ) + add_definitions( -UNDEBUG ) + add_definitions(-DTEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) + # Also remove /D NDEBUG to avoid MSVC warnings about conflicting defines. + foreach (flags_var_to_scrub + CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS_MINSIZEREL) + string (REGEX REPLACE "(^| )[/-]D *NDEBUG($| )" " " + "${flags_var_to_scrub}" "${${flags_var_to_scrub}}") + endforeach() +endif() + # NOTE: These flags must be added after find_package(Threads REQUIRED) otherwise # they will break the configuration check. if (DEFINED BENCHMARK_CXX_LINKER_FLAGS) diff --git a/utils/google-benchmark/test/benchmark_test.cc b/utils/google-benchmark/test/benchmark_test.cc index d832f81ae..57731331e 100644 --- a/utils/google-benchmark/test/benchmark_test.cc +++ b/utils/google-benchmark/test/benchmark_test.cc @@ -150,7 +150,7 @@ static void BM_LongTest(benchmark::State& state) { BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28); static void BM_ParallelMemset(benchmark::State& state) { - int size = state.range(0) / sizeof(int); + int size = state.range(0) / static_cast(sizeof(int)); int thread_size = size / state.threads; int from = thread_size * state.thread_index; int to = from + thread_size; @@ -209,11 +209,27 @@ BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"), std::pair(42, 3.8)); void BM_non_template_args(benchmark::State& state, int, double) { - while (state.KeepRunning()) { - } + while(state.KeepRunning()) {} } BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); +static void BM_UserCounter(benchmark::State& state) { + static const int depth = 1024; + while (state.KeepRunning()) { + benchmark::DoNotOptimize(CalculatePi(depth)); + } + state.counters["Foo"] = 1; + state.counters["Bar"] = 2; + state.counters["Baz"] = 3; + state.counters["Bat"] = 5; +#ifdef BENCHMARK_HAS_CXX11 + state.counters.insert({{"Foo", 2}, {"Bar", 3}, {"Baz", 5}, {"Bat", 6}}); +#endif +} +BENCHMARK(BM_UserCounter)->Threads(8); +BENCHMARK(BM_UserCounter)->ThreadRange(1, 32); +BENCHMARK(BM_UserCounter)->ThreadPerCpu(); + #endif // __cplusplus >= 201103L static void BM_DenseThreadRanges(benchmark::State& st) { diff --git a/utils/google-benchmark/test/cxx03_test.cc b/utils/google-benchmark/test/cxx03_test.cc index 4f3d0fb6f..a79d964e1 100644 --- a/utils/google-benchmark/test/cxx03_test.cc +++ b/utils/google-benchmark/test/cxx03_test.cc @@ -39,4 +39,10 @@ void BM_template1(benchmark::State& state) { BENCHMARK_TEMPLATE(BM_template1, long); BENCHMARK_TEMPLATE1(BM_template1, int); +void BM_counters(benchmark::State& state) { + BM_empty(state); + state.counters["Foo"] = 2; +} +BENCHMARK(BM_counters); + BENCHMARK_MAIN() diff --git a/utils/google-benchmark/test/diagnostics_test.cc b/utils/google-benchmark/test/diagnostics_test.cc index c6c235d0c..1046730b0 100644 --- a/utils/google-benchmark/test/diagnostics_test.cc +++ b/utils/google-benchmark/test/diagnostics_test.cc @@ -26,7 +26,7 @@ void TestHandler() { } void try_invalid_pause_resume(benchmark::State& state) { -#if !defined(NDEBUG) && !defined(TEST_HAS_NO_EXCEPTIONS) +#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && !defined(TEST_HAS_NO_EXCEPTIONS) try { state.PauseTiming(); std::abort(); diff --git a/utils/google-benchmark/test/options_test.cc b/utils/google-benchmark/test/options_test.cc index bedb1cc3e..bbbed2883 100644 --- a/utils/google-benchmark/test/options_test.cc +++ b/utils/google-benchmark/test/options_test.cc @@ -1,8 +1,12 @@ #include "benchmark/benchmark_api.h" - #include #include +#if defined(NDEBUG) +#undef NDEBUG +#endif +#include + void BM_basic(benchmark::State& state) { while (state.KeepRunning()) { } @@ -40,4 +44,22 @@ void CustomArgs(benchmark::internal::Benchmark* b) { BENCHMARK(BM_basic)->Apply(CustomArgs); +void BM_explicit_iteration_count(benchmark::State& st) { + // Test that benchmarks specified with an explicit iteration count are + // only run once. + static bool invoked_before = false; + assert(!invoked_before); + invoked_before = true; + + // Test that the requested iteration count is respected. + assert(st.max_iterations == 42); + size_t actual_iterations = 0; + while (st.KeepRunning()) + ++actual_iterations; + assert(st.iterations() == st.max_iterations); + assert(st.iterations() == 42); + +} +BENCHMARK(BM_explicit_iteration_count)->Iterations(42); + BENCHMARK_MAIN() diff --git a/utils/google-benchmark/test/output_test_helper.cc b/utils/google-benchmark/test/output_test_helper.cc index 721d39f92..54c028a67 100644 --- a/utils/google-benchmark/test/output_test_helper.cc +++ b/utils/google-benchmark/test/output_test_helper.cc @@ -31,7 +31,7 @@ TestCaseList& GetTestCaseList(TestCaseID ID) { SubMap& GetSubstitutions() { // Don't use 'dec_re' from header because it may not yet be initialized. - static std::string dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; + static std::string safe_dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; static SubMap map = { {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"}, {"%int", "[ ]*[0-9]+"}, @@ -39,13 +39,13 @@ SubMap& GetSubstitutions() { {"%time", "[ ]*[0-9]{1,5} ns"}, {"%console_report", "[ ]*[0-9]{1,5} ns [ ]*[0-9]{1,5} ns [ ]*[0-9]+"}, {"%console_us_report", "[ ]*[0-9] us [ ]*[0-9] us [ ]*[0-9]+"}, - {"%csv_report", "[0-9]+," + dec_re + "," + dec_re + ",ns,,,,,"}, - {"%csv_us_report", "[0-9]+," + dec_re + "," + dec_re + ",us,,,,,"}, + {"%csv_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,,,"}, + {"%csv_us_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",us,,,,,"}, {"%csv_bytes_report", - "[0-9]+," + dec_re + "," + dec_re + ",ns," + dec_re + ",,,,"}, + "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + ",,,,"}, {"%csv_items_report", - "[0-9]+," + dec_re + "," + dec_re + ",ns,," + dec_re + ",,,"}, - {"%csv_label_report_begin", "[0-9]+," + dec_re + "," + dec_re + ",ns,,,"}, + "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,," + safe_dec_re + ",,,"}, + {"%csv_label_report_begin", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,"}, {"%csv_label_report_end", ",,"}}; return map; } diff --git a/utils/google-benchmark/test/reporter_output_test.cc b/utils/google-benchmark/test/reporter_output_test.cc index 2e6d2b2a0..cb52aec0c 100644 --- a/utils/google-benchmark/test/reporter_output_test.cc +++ b/utils/google-benchmark/test/reporter_output_test.cc @@ -9,8 +9,10 @@ // ---------------------- Testing Prologue Output -------------------------- // // ========================================================================= // -ADD_CASES(TC_ConsoleOut, {{"^Benchmark %s Time %s CPU %s Iterations$", MR_Next}, - {"^[-]+$", MR_Next}}); +ADD_CASES(TC_ConsoleOut, + {{"^[-]+$", MR_Next}, + {"^Benchmark %s Time %s CPU %s Iterations$", MR_Next}, + {"^[-]+$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"name,iterations,real_time,cpu_time,time_unit,bytes_per_second," "items_per_second,label,error_occurred,error_message"}}); diff --git a/utils/google-benchmark/tools/compare_bench.py b/utils/google-benchmark/tools/compare_bench.py index ed0f133e0..d54baaa0e 100644 --- a/utils/google-benchmark/tools/compare_bench.py +++ b/utils/google-benchmark/tools/compare_bench.py @@ -3,25 +3,63 @@ compare_bench.py - Compare two benchmarks or their results and report the difference. """ +import argparse +from argparse import ArgumentParser import sys import gbench from gbench import util, report +from gbench.util import * + +def check_inputs(in1, in2, flags): + """ + Perform checking on the user provided inputs and diagnose any abnormalities + """ + in1_kind, in1_err = classify_input_file(in1) + in2_kind, in2_err = classify_input_file(in2) + output_file = find_benchmark_flag('--benchmark_out=', flags) + output_type = find_benchmark_flag('--benchmark_out_format=', flags) + if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file: + print(("WARNING: '--benchmark_out=%s' will be passed to both " + "benchmarks causing it to be overwritten") % output_file) + if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0: + print("WARNING: passing --benchmark flags has no effect since both " + "inputs are JSON") + if output_type is not None and output_type != 'json': + print(("ERROR: passing '--benchmark_out_format=%s' to 'compare_bench.py`" + " is not supported.") % output_type) + sys.exit(1) + def main(): + parser = ArgumentParser( + description='compare the results of two benchmarks') + parser.add_argument( + 'test1', metavar='test1', type=str, nargs=1, + help='A benchmark executable or JSON output file') + parser.add_argument( + 'test2', metavar='test2', type=str, nargs=1, + help='A benchmark executable or JSON output file') + # FIXME this is a dummy argument which will never actually match + # any --benchmark flags but it helps generate a better usage message + parser.add_argument( + 'benchmark_options', metavar='benchmark_option', nargs='*', + help='Arguments to pass when running benchmark executables' + ) + args, unknown_args = parser.parse_known_args() # Parse the command line flags - def usage(): - print('compare_bench.py [benchmark options]...') + test1 = args.test1[0] + test2 = args.test2[0] + if args.benchmark_options: + print("Unrecognized positional argument arguments: '%s'" + % args.benchmark_options) exit(1) - if '--help' in sys.argv or len(sys.argv) < 3: - usage() - tests = sys.argv[1:3] - bench_opts = sys.argv[3:] - bench_opts = list(bench_opts) + benchmark_options = unknown_args + check_inputs(test1, test2, benchmark_options) # Run the benchmarks and report the results - json1 = gbench.util.run_or_load_benchmark(tests[0], bench_opts) - json2 = gbench.util.run_or_load_benchmark(tests[1], bench_opts) + json1 = gbench.util.run_or_load_benchmark(test1, benchmark_options) + json2 = gbench.util.run_or_load_benchmark(test2, benchmark_options) output_lines = gbench.report.generate_difference_report(json1, json2) - print 'Comparing %s to %s' % (tests[0], tests[1]) + print('Comparing %s to %s' % (test1, test2)) for ln in output_lines: print(ln) diff --git a/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json b/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json index da9425e46..37faed46d 100644 --- a/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json +++ b/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json @@ -41,6 +41,20 @@ "real_time": 100, "cpu_time": 100, "time_unit": "ns" + }, + { + "name": "BM_100xSlower", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_100xFaster", + "iterations": 1000, + "real_time": 10000, + "cpu_time": 10000, + "time_unit": "ns" } ] } \ No newline at end of file diff --git a/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json b/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json index d8bc72d29..aed5151d3 100644 --- a/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json +++ b/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json @@ -41,6 +41,20 @@ "real_time": 110, "cpu_time": 110, "time_unit": "ns" + }, + { + "name": "BM_100xSlower", + "iterations": 1000, + "real_time": 10000, + "cpu_time": 10000, + "time_unit": "ns" + }, + { + "name": "BM_100xFaster", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" } ] } \ No newline at end of file diff --git a/utils/google-benchmark/tools/gbench/report.py b/utils/google-benchmark/tools/gbench/report.py index ac69b9bef..8f1b0fa86 100644 --- a/utils/google-benchmark/tools/gbench/report.py +++ b/utils/google-benchmark/tools/gbench/report.py @@ -92,7 +92,7 @@ def generate_difference_report(json1, json2, use_color=True): return BC_WHITE else: return BC_CYAN - fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc} {:4d} {:4d}" + fmt_str = "{}{:<{}s}{endc}{}{:+9.2f}{endc}{}{:+14.2f}{endc}{:14d}{:14d}" tres = calculate_change(bn['real_time'], other_bench['real_time']) cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time']) output_strs += [color_format(use_color, fmt_str, @@ -121,19 +121,22 @@ class TestReportDifference(unittest.TestCase): def test_basic(self): expect_lines = [ - ['BM_SameTimes', '+0.00', '+0.00'], - ['BM_2xFaster', '-0.50', '-0.50'], - ['BM_2xSlower', '+1.00', '+1.00'], - ['BM_10PercentFaster', '-0.10', '-0.10'], - ['BM_10PercentSlower', '+0.10', '+0.10'] + ['BM_SameTimes', '+0.00', '+0.00', '10', '10'], + ['BM_2xFaster', '-0.50', '-0.50', '50', '25'], + ['BM_2xSlower', '+1.00', '+1.00', '50', '100'], + ['BM_10PercentFaster', '-0.10', '-0.10', '100', '90'], + ['BM_10PercentSlower', '+0.10', '+0.10', '100', '110'], + ['BM_100xSlower', '+99.00', '+99.00', '100', '10000'], + ['BM_100xFaster', '-0.99', '-0.99', '10000', '100'], ] json1, json2 = self.load_results() - output_lines = generate_difference_report(json1, json2, use_color=False) - print output_lines + output_lines_with_header = generate_difference_report(json1, json2, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n".join(output_lines_with_header)) self.assertEqual(len(output_lines), len(expect_lines)) for i in xrange(0, len(output_lines)): parts = [x for x in output_lines[i].split(' ') if x] - self.assertEqual(len(parts), 3) + self.assertEqual(len(parts), 5) self.assertEqual(parts, expect_lines[i]) diff --git a/utils/google-benchmark/tools/gbench/util.py b/utils/google-benchmark/tools/gbench/util.py index 169b71c2c..07c237727 100644 --- a/utils/google-benchmark/tools/gbench/util.py +++ b/utils/google-benchmark/tools/gbench/util.py @@ -20,21 +20,21 @@ def is_executable_file(filename): """ if not os.path.isfile(filename): return False - with open(filename, 'r') as f: + with open(filename, mode='rb') as f: magic_bytes = f.read(_num_magic_bytes) if sys.platform == 'darwin': return magic_bytes in [ - '\xfe\xed\xfa\xce', # MH_MAGIC - '\xce\xfa\xed\xfe', # MH_CIGAM - '\xfe\xed\xfa\xcf', # MH_MAGIC_64 - '\xcf\xfa\xed\xfe', # MH_CIGAM_64 - '\xca\xfe\xba\xbe', # FAT_MAGIC - '\xbe\xba\xfe\xca' # FAT_CIGAM + b'\xfe\xed\xfa\xce', # MH_MAGIC + b'\xce\xfa\xed\xfe', # MH_CIGAM + b'\xfe\xed\xfa\xcf', # MH_MAGIC_64 + b'\xcf\xfa\xed\xfe', # MH_CIGAM_64 + b'\xca\xfe\xba\xbe', # FAT_MAGIC + b'\xbe\xba\xfe\xca' # FAT_CIGAM ] elif sys.platform.startswith('win'): - return magic_bytes == 'MZ' + return magic_bytes == b'MZ' else: - return magic_bytes == '\x7FELF' + return magic_bytes == b'\x7FELF' def is_json_file(filename): @@ -68,7 +68,7 @@ def classify_input_file(filename): elif is_json_file(filename): ftype = IT_JSON else: - err_msg = "'%s' does not name a valid benchmark executable or JSON file" + err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename return ftype, err_msg @@ -80,10 +80,30 @@ def check_input_file(filename): """ ftype, msg = classify_input_file(filename) if ftype == IT_Invalid: - print "Invalid input file: %s" % msg + print("Invalid input file: %s" % msg) sys.exit(1) return ftype +def find_benchmark_flag(prefix, benchmark_flags): + """ + Search the specified list of flags for a flag matching `` and + if it is found return the arg it specifies. If specified more than once the + last value is returned. If the flag is not found None is returned. + """ + assert prefix.startswith('--') and prefix.endswith('=') + result = None + for f in benchmark_flags: + if f.startswith(prefix): + result = f[len(prefix):] + return result + +def remove_benchmark_flags(prefix, benchmark_flags): + """ + Return a new list containing the specified benchmark_flags except those + with the specified prefix. + """ + assert prefix.startswith('--') and prefix.endswith('=') + return [f for f in benchmark_flags if not f.startswith(prefix)] def load_benchmark_results(fname): """ @@ -101,16 +121,25 @@ def run_benchmark(exe_name, benchmark_flags): real time console output. RETURNS: A JSON object representing the benchmark output """ - thandle, tname = tempfile.mkstemp() - os.close(thandle) + output_name = find_benchmark_flag('--benchmark_out=', + benchmark_flags) + is_temp_output = False + if output_name is None: + is_temp_output = True + thandle, output_name = tempfile.mkstemp() + os.close(thandle) + benchmark_flags = list(benchmark_flags) + \ + ['--benchmark_out=%s' % output_name] + cmd = [exe_name] + benchmark_flags print("RUNNING: %s" % ' '.join(cmd)) - exitCode = subprocess.call(cmd + ['--benchmark_out=%s' % tname]) + exitCode = subprocess.call(cmd) if exitCode != 0: print('TEST FAILED...') sys.exit(exitCode) - json_res = load_benchmark_results(tname) - os.unlink(tname) + json_res = load_benchmark_results(output_name) + if is_temp_output: + os.unlink(output_name) return json_res