From a0da9a768503f17a6b3a4523aa52ac8c1440be3a Mon Sep 17 00:00:00 2001 From: mipengwei Date: Wed, 20 Jul 2022 09:51:00 +0800 Subject: [PATCH] Signed-off-by:mipengwei --- .clang-tidy | 7 + .github/.libcxx-setup.sh | 24 + .github/workflows/bazel.yml | 30 + .../workflows/build-and-test-perfcounters.yml | 97 ++ .github/workflows/build-and-test.yml | 155 ++- .github/workflows/clang-format-lint.yml | 17 + .github/workflows/clang-tidy.yml | 38 + .github/workflows/doxygen.yml | 26 + .github/workflows/pylint.yml | 4 +- .github/workflows/sanitizer.yml | 92 ++ .github/workflows/test_bindings.yml | 4 +- .github/workflows/wheels.yml | 133 ++ .gitignore | 1 + .travis.yml | 25 +- AUTHORS | 6 + BUILD.bazel | 12 +- BUILD.gn | 1 + CMakeLists.txt | 80 +- CONTRIBUTORS | 8 + OAT.xml | 2 +- README.OpenSource | 6 +- README.md | 1144 +--------------- WORKSPACE | 26 +- _config.yml | 3 +- bindings/python/google_benchmark/__init__.py | 6 +- bindings/python/google_benchmark/benchmark.cc | 9 +- bindings/python/google_benchmark/example.py | 2 +- cmake/AddCXXCompilerFlag.cmake | 12 +- cmake/Config.cmake.in | 6 + cmake/GetGitVersion.cmake | 22 +- cmake/GoogleTest.cmake | 15 +- cmake/GoogleTest.cmake.in | 5 +- cmake/benchmark.pc.in | 4 +- conan/CMakeLists.txt | 7 - conan/test_package/CMakeLists.txt | 10 - conan/test_package/conanfile.py | 19 - conan/test_package/test_package.cpp | 18 - conanfile.py | 79 -- docs/_config.yml | 2 +- docs/dependencies.md | 19 + docs/index.md | 10 + docs/perf_counters.md | 34 + docs/platform_specific_build_instructions.md | 48 + docs/random_interleaving.md | 13 + docs/releasing.md | 23 +- docs/user_guide.md | 1200 +++++++++++++++++ include/benchmark/benchmark.h | 408 ++++-- requirements.txt | 3 + setup.py | 3 + src/CMakeLists.txt | 91 +- src/benchmark.cc | 281 ++-- src/benchmark_api_internal.cc | 111 +- src/benchmark_api_internal.h | 77 +- src/benchmark_register.cc | 185 ++- src/benchmark_register.h | 27 +- src/benchmark_runner.cc | 477 ++++--- src/benchmark_runner.h | 75 +- src/check.h | 44 +- src/colorprint.cc | 8 +- src/commandlineflags.cc | 57 + src/commandlineflags.h | 73 +- src/complexity.cc | 24 +- src/console_reporter.cc | 53 +- src/csv_reporter.cc | 18 +- src/cycleclock.h | 27 +- src/internal_macros.h | 10 +- src/json_reporter.cc | 114 +- src/log.h | 2 +- src/mutex.h | 44 +- src/perf_counters.cc | 132 ++ src/perf_counters.h | 172 +++ src/re.h | 2 +- src/reporter.cc | 19 +- src/sleep.cc | 17 +- src/statistics.cc | 41 +- src/statistics.h | 1 + src/string_util.cc | 36 +- src/string_util.h | 14 +- src/sysinfo.cc | 109 +- src/thread_manager.h | 1 - src/thread_timer.h | 8 +- src/timers.cc | 51 +- test/BUILD | 2 + test/CMakeLists.txt | 14 + test/args_product_test.cc | 10 +- test/basic_test.cc | 77 +- test/benchmark_gtest.cc | 37 + test/benchmark_random_interleaving_gtest.cc | 127 ++ test/benchmark_setup_teardown_test.cc | 157 +++ test/benchmark_test.cc | 28 +- test/clobber_memory_assembly_test.cc | 1 - test/commandlineflags_gtest.cc | 33 +- test/complexity_test.cc | 79 +- test/cxx03_test.cc | 7 +- test/diagnostics_test.cc | 6 +- test/display_aggregates_only_test.cc | 10 +- test/donotoptimize_assembly_test.cc | 8 +- test/donotoptimize_test.cc | 13 +- test/filter_test.cc | 28 +- test/fixture_test.cc | 28 +- test/internal_threading_test.cc | 1 + test/map_test.cc | 8 +- test/memory_manager_test.cc | 8 +- test/multiple_ranges_test.cc | 10 +- test/options_test.cc | 8 +- test/output_test.h | 26 +- test/output_test_helper.cc | 96 +- test/perf_counters_gtest.cc | 145 ++ test/perf_counters_test.cc | 27 + test/register_benchmark_test.cc | 10 +- test/repetitions_test.cc | 214 +++ test/report_aggregates_only_test.cc | 10 +- test/reporter_output_test.cc | 400 +++++- test/skip_with_error_test.cc | 23 +- test/spec_arg_test.cc | 95 ++ test/statistics_gtest.cc | 7 + test/string_util_gtest.cc | 157 ++- test/templated_fixture_test.cc | 4 +- test/user_counters_tabular_test.cc | 318 ++++- test/user_counters_test.cc | 50 +- test/user_counters_thousands_test.cc | 13 + tools/BUILD.bazel | 19 + tools/compare.py | 27 +- tools/gbench/Inputs/test4_run.json | 96 ++ tools/gbench/Inputs/test4_run0.json | 21 + tools/gbench/Inputs/test4_run1.json | 21 + tools/gbench/report.py | 895 ++++++++++-- tools/gbench/util.py | 18 + 128 files changed, 6988 insertions(+), 2723 deletions(-) create mode 100644 .clang-tidy create mode 100644 .github/.libcxx-setup.sh create mode 100644 .github/workflows/bazel.yml create mode 100644 .github/workflows/build-and-test-perfcounters.yml create mode 100644 .github/workflows/clang-format-lint.yml create mode 100644 .github/workflows/clang-tidy.yml create mode 100644 .github/workflows/doxygen.yml create mode 100644 .github/workflows/sanitizer.yml create mode 100644 .github/workflows/wheels.yml delete mode 100644 conan/CMakeLists.txt delete mode 100644 conan/test_package/CMakeLists.txt delete mode 100644 conan/test_package/conanfile.py delete mode 100644 conan/test_package/test_package.cpp delete mode 100644 conanfile.py create mode 100644 docs/dependencies.md create mode 100644 docs/index.md create mode 100644 docs/perf_counters.md create mode 100644 docs/platform_specific_build_instructions.md create mode 100644 docs/random_interleaving.md create mode 100644 docs/user_guide.md create mode 100644 requirements.txt create mode 100644 src/perf_counters.cc create mode 100644 src/perf_counters.h create mode 100644 test/benchmark_random_interleaving_gtest.cc create mode 100644 test/benchmark_setup_teardown_test.cc create mode 100644 test/perf_counters_gtest.cc create mode 100644 test/perf_counters_test.cc create mode 100644 test/repetitions_test.cc create mode 100644 test/spec_arg_test.cc create mode 100644 tools/BUILD.bazel create mode 100644 tools/gbench/Inputs/test4_run.json create mode 100644 tools/gbench/Inputs/test4_run0.json create mode 100644 tools/gbench/Inputs/test4_run1.json diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000..56938a5 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,7 @@ +--- +Checks: 'clang-analyzer-*,readability-redundant-*,performance-*' +WarningsAsErrors: 'clang-analyzer-*,readability-redundant-*,performance-*' +HeaderFilterRegex: '.*' +AnalyzeTemporaryDtors: false +FormatStyle: none +User: user diff --git a/.github/.libcxx-setup.sh b/.github/.libcxx-setup.sh new file mode 100644 index 0000000..5600840 --- /dev/null +++ b/.github/.libcxx-setup.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# Checkout LLVM sources +git clone --depth=1 https://github.com/llvm/llvm-project.git llvm-project + +# Setup libc++ options +if [ -z "$BUILD_32_BITS" ]; then + export BUILD_32_BITS=OFF && echo disabling 32 bit build +fi + +# Build and install libc++ (Use unstable ABI for better sanitizer coverage) +cd ./llvm-project +cmake -DCMAKE_C_COMPILER=${C_COMPILER} \ + -DCMAKE_CXX_COMPILER=${COMPILER} \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DLIBCXX_ABI_UNSTABLE=OFF \ + -DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \ + -DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \ + -DLLVM_ENABLE_PROJECTS='libcxx;libcxxabi' \ + -S llvm -B llvm-build -G "Unix Makefiles" +make -C llvm-build -j3 cxx cxxabi +sudo make -C llvm-build install-cxx install-cxxabi +cd .. diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml new file mode 100644 index 0000000..a53661b --- /dev/null +++ b/.github/workflows/bazel.yml @@ -0,0 +1,30 @@ +name: bazel + +on: + push: {} + pull_request: {} + +jobs: + build-and-test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + + - name: mount bazel cache + uses: actions/cache@v2.0.0 + env: + cache-name: bazel-cache + with: + path: "~/.cache/bazel" + key: ${{ env.cache-name }}-${{ runner.os }}-${{ github.ref }} + restore-keys: | + ${{ env.cache-name }}-${{ runner.os }}-main + + - name: build + run: | + bazel build //:benchmark //:benchmark_main //test/... + + - name: test + run: | + bazel test --test_output=all //test/... diff --git a/.github/workflows/build-and-test-perfcounters.yml b/.github/workflows/build-and-test-perfcounters.yml new file mode 100644 index 0000000..bb5a43f --- /dev/null +++ b/.github/workflows/build-and-test-perfcounters.yml @@ -0,0 +1,97 @@ +name: build-and-test-perfcounters + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + job: + # TODO(dominic): Extend this to include compiler and set through env: CC/CXX. + name: ${{ matrix.os }}.${{ matrix.build_type }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, ubuntu-20.04] + build_type: ['Release', 'Debug'] + steps: + - uses: actions/checkout@v2 + + - name: install libpfm + run: sudo apt -y install libpfm4-dev + + - name: setup cmake + uses: jwlawson/actions-setup-cmake@v1.9 + with: + cmake-version: '3.5.1' + + - name: create build environment + run: cmake -E make_directory ${{ runner.workspace }}/_build + + - name: configure cmake + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: > + cmake $GITHUB_WORKSPACE + -DBENCHMARK_ENABLE_LIBPFM=1 + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + + - name: build + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: cmake --build . --config ${{ matrix.build_type }} + + # Skip testing, for now. It seems perf_event_open does not succeed on the + # hosting machine, very likely a permissions issue. + # TODO(mtrofin): Enable test. + # - name: test + # shell: bash + # working-directory: ${{ runner.workspace }}/_build + # run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure + + ubuntu-16_04: + name: ubuntu-16.04.${{ matrix.build_type }} + runs-on: [ubuntu-latest] + strategy: + fail-fast: false + matrix: + build_type: ['Release', 'Debug'] + container: ubuntu:16.04 + steps: + - uses: actions/checkout@v2 + + - name: install required bits + run: | + apt update + apt -y install clang cmake g++ git + + - name: install libpfm + run: apt -y install libpfm4-dev + + - name: create build environment + run: cmake -E make_directory $GITHUB_WORKSPACE/_build + + - name: configure cmake + shell: bash + working-directory: ${{ github.workspace }}/_build + run: > + cmake $GITHUB_WORKSPACE + -DBENCHMARK_ENABLE_LIBPFM=1 + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + + - name: build + shell: bash + working-directory: ${{ github.workspace }}/_build + run: cmake --build . --config ${{ matrix.build_type }} + + # Skip testing, for now. It seems perf_event_open does not succeed on the + # hosting machine, very likely a permissions issue. + # TODO(mtrofin): Enable test. + # - name: test + # shell: bash + # working-directory: ${{ runner.workspace }}/_build + # run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index f0f0626..6ff6f49 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -1,38 +1,149 @@ name: build-and-test on: - push: - branches: [ master ] - pull_request: - branches: [ master ] + push: {} + pull_request: {} jobs: + # TODO: add 32-bit builds (g++ and clang++) for ubuntu + # (requires g++-multilib and libc6:i386) + # TODO: add coverage build (requires lcov) + # TODO: add clang + libc++ builds for ubuntu job: - # TODO(dominic): Extend this to include compiler and set through env: CC/CXX. - name: ${{ matrix.os }}.${{ matrix.build_type }} + name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.compiler }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [ubuntu-latest, ubuntu-16.04, ubuntu-20.04, macos-latest, windows-latest] + os: [ubuntu-latest, ubuntu-20.04, macos-latest] build_type: ['Release', 'Debug'] + compiler: [g++, clang++] + include: + - displayTargetName: windows-latest-release + os: windows-latest + build_type: 'Release' + - displayTargetName: windows-latest-debug + os: windows-latest + build_type: 'Debug' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v2 - - name: create build environment - run: cmake -E make_directory ${{ runner.workspace }}/_build + - name: create build environment + run: cmake -E make_directory ${{ runner.workspace }}/_build - - name: configure cmake - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + - name: configure cmake + env: + CXX: ${{ matrix.compiler }} + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: > + cmake $GITHUB_WORKSPACE + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - - name: build - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: cmake --build . --config ${{ matrix.build_type }} + - name: build + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: cmake --build . --config ${{ matrix.build_type }} - - name: test - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: ctest -C ${{ matrix.build_type }} + - name: test + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: ctest -C ${{ matrix.build_type }} -VV + + ubuntu-16_04: + name: ubuntu-16.04.${{ matrix.build_type }}.${{ matrix.compiler }} + runs-on: [ubuntu-latest] + strategy: + fail-fast: false + matrix: + build_type: ['Release', 'Debug'] + compiler: [g++, clang++] + container: ubuntu:16.04 + steps: + - uses: actions/checkout@v2 + + - name: install required bits + run: | + apt update + apt -y install clang cmake g++ git + + - name: create build environment + run: cmake -E make_directory $GITHUB_WORKSPACE/_build + + - name: configure cmake + env: + CXX: ${{ matrix.compiler }} + shell: bash + working-directory: ${{ github.workspace }}/_build + run: > + cmake $GITHUB_WORKSPACE + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + + - name: build + shell: bash + working-directory: ${{ github.workspace }}/_build + run: cmake --build . --config ${{ matrix.build_type }} + + - name: test + shell: bash + working-directory: ${{ github.workspace }}/_build + run: ctest -C ${{ matrix.build_type }} -VV + + ubuntu-14_04: + name: ubuntu-14.04.${{ matrix.build_type }}.${{ matrix.compiler }} + runs-on: [ubuntu-latest] + strategy: + fail-fast: false + matrix: + build_type: ['Release', 'Debug'] + compiler: [g++-4.8, clang++-3.6] + include: + - compiler: g++-6 + build_type: 'Debug' + run_tests: true + - compiler: g++-6 + build_type: 'Release' + run_tests: true + container: ubuntu:14.04 + steps: + - uses: actions/checkout@v2 + + - name: install required bits + run: | + sudo apt update + sudo apt -y install clang-3.6 cmake3 g++-4.8 git + + - name: install other bits + if: ${{ matrix.compiler }} == g++-6 + run: | + sudo apt -y install software-properties-common + sudo add-apt-repository -y "ppa:ubuntu-toolchain-r/test" + sudo apt update + sudo apt -y install g++-6 + + - name: create build environment + run: cmake -E make_directory $GITHUB_WORKSPACE/_build + + - name: configure cmake + env: + CXX: ${{ matrix.compiler }} + shell: bash + working-directory: ${{ github.workspace }}/_build + run: > + cmake $GITHUB_WORKSPACE + -DBENCHMARK_ENABLE_TESTING=${{ matrix.run_tests }} + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=${{ matrix.run_tests }} + + - name: build + shell: bash + working-directory: ${{ github.workspace }}/_build + run: cmake --build . --config ${{ matrix.build_type }} + + - name: test + if: ${{ matrix.run_tests }} + shell: bash + working-directory: ${{ github.workspace }}/_build + run: ctest -C ${{ matrix.build_type }} -VV diff --git a/.github/workflows/clang-format-lint.yml b/.github/workflows/clang-format-lint.yml new file mode 100644 index 0000000..75775c7 --- /dev/null +++ b/.github/workflows/clang-format-lint.yml @@ -0,0 +1,17 @@ +name: clang-format-lint +on: + push: {} + pull_request: {} + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - uses: DoozyX/clang-format-lint-action@v0.13 + with: + source: './include/benchmark ./src ./test' + extensions: 'h,cc' + clangFormatVersion: 12 + style: Google diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml new file mode 100644 index 0000000..978171d --- /dev/null +++ b/.github/workflows/clang-tidy.yml @@ -0,0 +1,38 @@ +name: clang-tidy + +on: + push: {} + pull_request: {} + +jobs: + job: + name: run-clang-tidy + runs-on: ubuntu-latest + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v2 + + - name: install clang-tidy + run: sudo apt update && sudo apt -y install clang-tidy + + - name: create build environment + run: cmake -E make_directory ${{ runner.workspace }}/_build + + - name: configure cmake + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: > + cmake $GITHUB_WORKSPACE + -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF + -DBENCHMARK_ENABLE_LIBPFM=OFF + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_C_COMPILER=clang + -DCMAKE_CXX_COMPILER=clang++ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + -DGTEST_COMPILE_COMMANDS=OFF + + - name: run + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: run-clang-tidy diff --git a/.github/workflows/doxygen.yml b/.github/workflows/doxygen.yml new file mode 100644 index 0000000..dc55011 --- /dev/null +++ b/.github/workflows/doxygen.yml @@ -0,0 +1,26 @@ +name: doxygen + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build-and-deploy: + name: Build HTML documentation + runs-on: ubuntu-latest + steps: + - name: Fetching sources + uses: actions/checkout@v2 + - name: Installing build dependencies + run: | + sudo apt update + sudo apt install cmake doxygen gcc git + - name: Creating build directory + run: | + mkdir build + - name: Building HTML documentation with Doxygen + run: | + cmake -S . -B build -DBENCHMARK_ENABLE_TESTING:BOOL=OFF -DBENCHMARK_ENABLE_DOXYGEN:BOOL=ON -DBENCHMARK_INSTALL_DOCS:BOOL=ON + cmake --build build --target benchmark_doxygen diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index c869674..0f73a58 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -2,9 +2,9 @@ name: pylint on: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] jobs: pylint: diff --git a/.github/workflows/sanitizer.yml b/.github/workflows/sanitizer.yml new file mode 100644 index 0000000..bbfc782 --- /dev/null +++ b/.github/workflows/sanitizer.yml @@ -0,0 +1,92 @@ +name: sanitizer + +on: + push: {} + pull_request: {} + +env: + UBSAN_OPTIONS: "print_stacktrace=1" + +jobs: + job: + name: ${{ matrix.sanitizer }}.${{ matrix.build_type }}.${{ matrix.compiler }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + build_type: ['Debug', 'RelWithDebInfo'] + sanitizer: ['asan', 'ubsan', 'tsan'] + compiler: ['clang', 'gcc'] + # TODO: add 'msan' above. currently failing and needs investigation. + steps: + - uses: actions/checkout@v2 + + - name: configure msan env + if: matrix.sanitizer == 'msan' + run: | + echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" >> $GITHUB_ENV + echo "LIBCXX_SANITIZER=MemoryWithOrigins" >> $GITHUB_ENV + + - name: configure ubsan env + if: matrix.sanitizer == 'ubsan' + run: | + echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all" >> $GITHUB_ENV + echo "LIBCXX_SANITIZER=Undefined" >> $GITHUB_ENV + + - name: configure asan env + if: matrix.sanitizer == 'asan' + run: | + echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" >> $GITHUB_ENV + echo "LIBCXX_SANITIZER=Address" >> $GITHUB_ENV + + - name: configure tsan env + if: matrix.sanitizer == 'tsan' + run: | + echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" >> $GITHUB_ENV + echo "LIBCXX_SANITIZER=Thread" >> $GITHUB_ENV + + - name: configure clang + if: matrix.compiler == 'clang' + run: | + echo "CC=clang" >> $GITHUB_ENV + echo "CXX=clang++" >> $GITHUB_ENV + + - name: configure gcc + if: matrix.compiler == 'gcc' + run: | + sudo apt update && sudo apt -y install gcc-10 g++-10 + echo "CC=gcc-10" >> $GITHUB_ENV + echo "CXX=g++-10" >> $GITHUB_ENV + + - name: install llvm stuff + if: matrix.compiler == 'clang' + run: | + "${GITHUB_WORKSPACE}/.github/.libcxx-setup.sh" + echo "EXTRA_CXX_FLAGS=\"-stdlib=libc++\"" >> $GITHUB_ENV + + - name: create build environment + run: cmake -E make_directory ${{ runner.workspace }}/_build + + - name: configure cmake + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: > + cmake $GITHUB_WORKSPACE + -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF + -DBENCHMARK_ENABLE_LIBPFM=OFF + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_C_COMPILER=${{ env.CC }} + -DCMAKE_CXX_COMPILER=${{ env.CXX }} + -DCMAKE_C_FLAGS="${{ env.EXTRA_FLAGS }}" + -DCMAKE_CXX_FLAGS="${{ env.EXTRA_FLAGS }} ${{ env.EXTRA_CXX_FLAGS }}" + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + + - name: build + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: cmake --build . --config ${{ matrix.build_type }} + + - name: test + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: ctest -C ${{ matrix.build_type }} -VV diff --git a/.github/workflows/test_bindings.yml b/.github/workflows/test_bindings.yml index 273d7f9..4a580eb 100644 --- a/.github/workflows/test_bindings.yml +++ b/.github/workflows/test_bindings.yml @@ -2,9 +2,9 @@ name: test-bindings on: push: - branches: [master] + branches: [main] pull_request: - branches: [master] + branches: [main] jobs: python_bindings: diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 0000000..c918e2e --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,133 @@ +name: Build and upload Python wheels + +on: + workflow_dispatch: + release: + types: + - published + +jobs: + build_sdist: + name: Build source distribution + runs-on: ubuntu-latest + steps: + - name: Check out repo + uses: actions/checkout@v2 + + - name: Install Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Build and check sdist + run: | + python setup.py sdist + - name: Upload sdist + uses: actions/upload-artifact@v2 + with: + name: dist + path: dist/*.tar.gz + + build_linux: + name: Build google-benchmark manylinux wheels + runs-on: ubuntu-latest + + steps: + - name: Check out Google Benchmark + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + # TODO: Bazel does not seem to work in an emulated Docker environment, see + # https://github.com/bazelbuild/bazel/issues/11379 +# - name: Set up QEMU +# uses: docker/setup-qemu-action@v1 +# with: +# platforms: all + + - name: Build Python wheels on ubuntu-latest + env: + CIBW_BUILD: 'cp37-* cp38-* cp39-* cp310-*' + CIBW_SKIP: "*-musllinux_*" + # Bazel repo only exists on CentOS 7 for x86 and ppc, so no manylinux2010 + # TODO: Build ppc64le, aarch64 using some other trick + CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 + CIBW_ARCHS_LINUX: x86_64 + CIBW_BEFORE_ALL: > + curl -O --retry-delay 5 --retry 5 https://copr.fedorainfracloud.org/coprs/vbatts/bazel/repo/epel-7/vbatts-bazel-epel-7.repo && + cp vbatts-bazel-epel-7.repo /etc/yum.repos.d/bazel.repo && + yum install -y bazel4 + CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py + run: | + pip install cibuildwheel + python -m cibuildwheel --output-dir wheelhouse + + - name: Upload Linux wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: wheelhouse/*.whl + + build_macos: + name: Build google-benchmark macOS wheels + runs-on: macos-latest + + steps: + - name: Check out Google Benchmark + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Build Python wheels on macOS + env: + CIBW_ARCHS_MACOS: "x86_64 arm64" + CIBW_BUILD: 'cp37-* cp38-* cp39-* cp310-*' + # ARM64 requires Python 3.8 minimum + CIBW_SKIP: 'cp37-*-arm64' + CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py + CIBW_TEST_SKIP: "*_arm64" + run: | + pip install cibuildwheel + python -m cibuildwheel --output-dir wheelhouse + + - name: Upload macOS wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: wheelhouse/*.whl + + build_windows: + name: Build google-benchmark wheels on Windows + runs-on: windows-latest + + steps: + - name: Check out Google Benchmark + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Build Python wheels on Windows + env: + CIBW_BUILD: 'cp37-* cp38-* cp39-* cp310-*' + CIBW_ARCHS_WINDOWS: AMD64 + # otherwise, pip crashes the job by trying to remove an in-use bazel DLL + PIP_NO_CLEAN: true + CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py + run: | + pip install cibuildwheel + python -m cibuildwheel --output-dir wheelhouse + + - name: Upload wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: wheelhouse/*.whl \ No newline at end of file diff --git a/.gitignore b/.gitignore index be55d77..704f56c 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ *.swp *.pyc __pycache__ +.DS_Store # lcov *.lcov diff --git a/.travis.yml b/.travis.yml index 36e343d..8cfed3d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,10 +10,6 @@ matrix: packages: - lcov env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Coverage - - compiler: gcc - env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Debug - - compiler: gcc - env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Release - compiler: gcc addons: apt: @@ -44,10 +40,6 @@ matrix: - COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Debug - ENABLE_SANITIZER=1 - EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold" - - compiler: clang - env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Debug - - compiler: clang - env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Release # Clang w/ libc++ - compiler: clang dist: xenial @@ -146,16 +138,6 @@ matrix: - ENABLE_SANITIZER=1 - EXTRA_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" - EXTRA_CXX_FLAGS="-stdlib=libc++" - - os: osx - osx_image: xcode8.3 - compiler: clang - env: - - COMPILER=clang++ BUILD_TYPE=Debug - - os: osx - osx_image: xcode8.3 - compiler: clang - env: - - COMPILER=clang++ BUILD_TYPE=Release - os: osx osx_image: xcode8.3 compiler: clang @@ -164,15 +146,10 @@ matrix: - BUILD_TYPE=Release - BUILD_32_BITS=ON - EXTRA_FLAGS="-m32" - - os: osx - osx_image: xcode9.4 - compiler: gcc - env: - - COMPILER=g++-7 C_COMPILER=gcc-7 BUILD_TYPE=Debug before_script: - if [ -n "${LIBCXX_BUILD}" ]; then - source .travis-libcxx-setup.sh; + source .libcxx-setup.sh; fi - if [ -n "${ENABLE_SANITIZER}" ]; then export EXTRA_OPTIONS="-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF"; diff --git a/AUTHORS b/AUTHORS index e353b53..2b8072e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -21,6 +21,8 @@ David Coeurjolly Deniz Evrenci Dirac Research Dominik Czarnota +Dominik Korman +Donald Aingworth Eric Backus Eric Fiselier Eugene Zhuk @@ -43,6 +45,7 @@ Matt Clarkson Maxim Vafin MongoDB Inc. Nick Hutchinson +Norman Heino Oleksandr Sochka Ori Livneh Paul Redmond @@ -50,8 +53,11 @@ Radoslav Yovchev Roman Lebedev Sayan Bhattacharjee Shuo Chen +Staffan Tjernstrom Steinar H. Gunderson Stripe, Inc. +Tobias Schmidt Yixuan Qiu Yusuke Suzuki Zbigniew Skowron +Min-Yih Hsu diff --git a/BUILD.bazel b/BUILD.bazel index eb35b62..904c691 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1,9 +1,17 @@ -load("@rules_cc//cc:defs.bzl", "cc_library") - licenses(["notice"]) +config_setting( + name = "qnx", + constraint_values = ["@platforms//os:qnx"], + values = { + "cpu": "x64_qnx", + }, + visibility = [":__subpackages__"], +) + config_setting( name = "windows", + constraint_values = ["@platforms//os:windows"], values = { "cpu": "x64_windows", }, diff --git a/BUILD.gn b/BUILD.gn index 4d44bb4..db8f969 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -41,6 +41,7 @@ ohos_static_library("benchmark") { "src/complexity.cc", "src/console_reporter.cc", "src/counter.cc", + "src/perf_counters.cc", "src/csv_reporter.cc", "src/json_reporter.cc", "src/reporter.cc", diff --git a/CMakeLists.txt b/CMakeLists.txt index a157666..b8852e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,18 +13,31 @@ foreach(p endif() endforeach() -project (benchmark CXX) +project (benchmark VERSION 1.6.1 LANGUAGES CXX) option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON) option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF) option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF) +option(BENCHMARK_ENABLE_WERROR "Build Release candidates with -Werror." ON) +option(BENCHMARK_FORCE_WERROR "Build Release candidates with -Werror regardless of compiler issues." OFF) + +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI") + # PGC++ maybe reporting false positives. + set(BENCHMARK_ENABLE_WERROR OFF) +endif() +if(BENCHMARK_FORCE_WERROR) + set(BENCHMARK_ENABLE_WERROR ON) +endif(BENCHMARK_FORCE_WERROR) + if(NOT MSVC) option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF) else() set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE) endif() option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON) +option(BENCHMARK_ENABLE_DOXYGEN "Build documentation with Doxygen." OFF) +option(BENCHMARK_INSTALL_DOCS "Enable installation of documentation." ON) # Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which # may require downloading the source code. @@ -33,8 +46,22 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi # This option can be used to disable building and running unit tests which depend on gtest # in cases where it is not possible to build or find a valid version of gtest. option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON) +option(BENCHMARK_USE_BUNDLED_GTEST "Use bundled GoogleTest. If disabled, the find_package(GTest) will be used." ON) + +option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +if(MSVC) + # As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and + # cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the + # undocumented, but working variable. + # See https://gitlab.kitware.com/cmake/cmake/-/issues/15170 + set(CMAKE_SYSTEM_PROCESSOR ${MSVC_CXX_ARCHITECTURE_ID}) + if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM") + set(CMAKE_CROSSCOMPILING TRUE) + endif() +endif() + set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF) function(should_enable_assembly_tests) if(CMAKE_BUILD_TYPE) @@ -81,8 +108,14 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(GetGitVersion) get_git_version(GIT_VERSION) +# If no git version can be determined, use the version +# from the project() command +if ("${GIT_VERSION}" STREQUAL "0.0.0") + set(VERSION "${benchmark_VERSION}") +else() + set(VERSION "${GIT_VERSION}") +endif() # Tell the user what versions we are using -string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION}) message(STATUS "Version: ${VERSION}") # The version of the libraries @@ -93,6 +126,9 @@ string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION) include(CheckCXXCompilerFlag) include(AddCXXCompilerFlag) include(CXXFeatureCheck) +include(CheckLibraryExists) + +check_library_exists(rt shm_open "" HAVE_LIB_RT) if (BENCHMARK_BUILD_32_BITS) add_required_cxx_compiler_flag(-m32) @@ -141,12 +177,17 @@ else() add_cxx_compiler_flag(-Wall) add_cxx_compiler_flag(-Wextra) add_cxx_compiler_flag(-Wshadow) - add_cxx_compiler_flag(-Werror RELEASE) - add_cxx_compiler_flag(-Werror RELWITHDEBINFO) - add_cxx_compiler_flag(-Werror MINSIZEREL) - # Disabled until googletest (gmock) stops emitting variadic macro warnings - #add_cxx_compiler_flag(-pedantic) - #add_cxx_compiler_flag(-pedantic-errors) + if(BENCHMARK_ENABLE_WERROR) + add_cxx_compiler_flag(-Werror RELEASE) + add_cxx_compiler_flag(-Werror RELWITHDEBINFO) + add_cxx_compiler_flag(-Werror MINSIZEREL) + endif() + if (NOT BENCHMARK_ENABLE_TESTING) + # Disable warning when compiling tests as gtest does not use 'override'. + add_cxx_compiler_flag(-Wsuggest-override) + endif() + add_cxx_compiler_flag(-pedantic) + add_cxx_compiler_flag(-pedantic-errors) add_cxx_compiler_flag(-Wshorten-64-to-32) add_cxx_compiler_flag(-fstrict-aliasing) # Disable warnings regarding deprecated parts of the library while building @@ -159,9 +200,11 @@ else() add_cxx_compiler_flag(-wd1786) endif() # Disable deprecation warnings for release builds (when -Werror is enabled). - add_cxx_compiler_flag(-Wno-deprecated RELEASE) - add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO) - add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL) + if(BENCHMARK_ENABLE_WERROR) + add_cxx_compiler_flag(-Wno-deprecated RELEASE) + add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO) + add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL) + endif() if (NOT BENCHMARK_ENABLE_EXCEPTIONS) add_cxx_compiler_flag(-fno-exceptions) endif() @@ -194,6 +237,7 @@ else() # Link time optimisation if (BENCHMARK_ENABLE_LTO) add_cxx_compiler_flag(-flto) + add_cxx_compiler_flag(-Wno-lto-type-mismatch) if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") find_program(GCC_AR gcc-ar) if (GCC_AR) @@ -269,6 +313,10 @@ cxx_feature_check(STEADY_CLOCK) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +if (BENCHMARK_ENABLE_LIBPFM) + find_package(PFM) +endif() + # Set up directories include_directories(${PROJECT_SOURCE_DIR}/include) @@ -280,7 +328,15 @@ if (BENCHMARK_ENABLE_TESTING) if (BENCHMARK_ENABLE_GTEST_TESTS AND NOT (TARGET gtest AND TARGET gtest_main AND TARGET gmock AND TARGET gmock_main)) - include(GoogleTest) + if (BENCHMARK_USE_BUNDLED_GTEST) + include(GoogleTest) + else() + find_package(GTest CONFIG REQUIRED) + add_library(gtest ALIAS GTest::gtest) + add_library(gtest_main ALIAS GTest::gtest_main) + add_library(gmock ALIAS GTest::gmock) + add_library(gmock_main ALIAS GTest::gmock_main) + endif() endif() add_subdirectory(test) endif() diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 6beed71..651fbea 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -22,6 +22,7 @@ # # Please keep the list sorted. +Abhina Sreeskantharajan Albert Pretorius Alex Steele Andriy Berestovskyy @@ -37,10 +38,13 @@ David Coeurjolly Deniz Evrenci Dominic Hamon Dominik Czarnota +Dominik Korman +Donald Aingworth Eric Backus Eric Fiselier Eugene Zhuk Evgeny Safronov +Fanbo Meng Federico Ficarelli Felix Homann Geoffrey Martin-Noble @@ -60,6 +64,7 @@ Lei Xu Matt Clarkson Maxim Vafin Nick Hutchinson +Norman Heino Oleksandr Sochka Ori Livneh Pascal Leroy @@ -72,8 +77,11 @@ Robert Guo Roman Lebedev Sayan Bhattacharjee Shuo Chen +Steven Wan +Tobias Schmidt Tobias Ulvgård Tom Madams Yixuan Qiu Yusuke Suzuki Zbigniew Skowron +Min-Yih Hsu diff --git a/OAT.xml b/OAT.xml index d94b409..d011c84 100644 --- a/OAT.xml +++ b/OAT.xml @@ -38,7 +38,7 @@ - + diff --git a/README.OpenSource b/README.OpenSource index f4b100e..5b8bc54 100644 --- a/README.OpenSource +++ b/README.OpenSource @@ -3,9 +3,9 @@ "Name": "benchmark", "License": "Apache License V2.0", "License File": "LICENSE", - "Version Number": "1.5.2", - "Owner": "renxiang11@huawei.com", - "Upstream URL": "https://github.com/google/benchmark/releases/tag/v1.5.2", + "Version Number": "1.6.1", + "Owner": "mipengwei@huawei.com", + "Upstream URL": "https://github.com/google/benchmark/releases/tag/v1.6.1", "Description": "A microbenchmark support library" } ] diff --git a/README.md b/README.md index 1471987..7b81d96 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,14 @@ # Benchmark +[![build-and-test](https://github.com/google/benchmark/workflows/build-and-test/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Abuild-and-test) +[![bazel](https://github.com/google/benchmark/actions/workflows/bazel.yml/badge.svg)](https://github.com/google/benchmark/actions/workflows/bazel.yml) +[![pylint](https://github.com/google/benchmark/workflows/pylint/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Apylint) +[![test-bindings](https://github.com/google/benchmark/workflows/test-bindings/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings) + [![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark) -[![Build status](https://ci.appveyor.com/api/projects/status/u0qsyp7t1tk7cpxs/branch/master?svg=true)](https://ci.appveyor.com/project/google/benchmark/branch/master) [![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark) + A library to benchmark code snippets, similar to unit tests. Example: ```c++ @@ -22,18 +27,21 @@ BENCHMARK(BM_SomeFunction); BENCHMARK_MAIN(); ``` +## Getting Started + To get started, see [Requirements](#requirements) and [Installation](#installation). See [Usage](#usage) for a full example and the -[User Guide](#user-guide) for a more comprehensive feature overview. +[User Guide](docs/user_guide.md) for a more comprehensive feature overview. -It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/googletest/docs/primer.md) +It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/docs/primer.md) as some of the structural aspects of the APIs are similar. -### Resources +## Resources [Discussion group](https://groups.google.com/d/forum/benchmark-discuss) -IRC channel: [freenode](https://freenode.net) #googlebenchmark +IRC channels: +* [libera](https://libera.chat) #benchmark [Additional Tooling Documentation](docs/tools.md) @@ -51,27 +59,25 @@ The following minimum versions are required to build the library: * Visual Studio 14 2015 * Intel 2015 Update 1 -See [Platform-Specific Build Instructions](#platform-specific-build-instructions). +See [Platform-Specific Build Instructions](docs/platform_specific_build_instructions.md). ## Installation This describes the installation process using cmake. As pre-requisites, you'll need git and cmake installed. -_See [dependencies.md](dependencies.md) for more details regarding supported +_See [dependencies.md](docs/dependencies.md) for more details regarding supported versions of build tools._ ```bash # Check out the library. $ git clone https://github.com/google/benchmark.git -# Benchmark requires Google Test as a dependency. Add the source tree as a subdirectory. -$ git clone https://github.com/google/googletest.git benchmark/googletest # Go to the library root directory $ cd benchmark # Make a build directory to place the build output. $ cmake -E make_directory "build" -# Generate build system files with cmake. -$ cmake -E chdir "build" cmake -DCMAKE_BUILD_TYPE=Release ../ +# Generate build system files with cmake, and download any dependencies. +$ cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../ # or, starting with CMake 3.13, use a simpler form: # cmake -DCMAKE_BUILD_TYPE=Release -S . -B "build" # Build the library. @@ -105,10 +111,10 @@ sudo cmake --build "build" --config Release --target install Note that Google Benchmark requires Google Test to build and run the tests. This dependency can be provided two ways: -* Checkout the Google Test sources into `benchmark/googletest` as above. +* Checkout the Google Test sources into `benchmark/googletest`. * Otherwise, if `-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON` is specified during - configuration, the library will automatically download and build any required - dependencies. + configuration as above, the library will automatically download and build + any required dependencies. If you do not wish to build and run the tests, add `-DBENCHMARK_ENABLE_GTEST_TESTS=OFF` to `CMAKE_ARGS`. @@ -187,7 +193,7 @@ Alternatively, link against the `benchmark_main` library and remove `BENCHMARK_MAIN();` above to get the same behavior. The compiled executable will run all benchmarks by default. Pass the `--help` -flag for option information or see the guide below. +flag for option information or see the [User Guide](docs/user_guide.md). ### Usage with CMake @@ -208,1111 +214,3 @@ Either way, link to the library as follows. ```cmake target_link_libraries(MyTarget benchmark::benchmark) ``` - -## Platform Specific Build Instructions - -### Building with GCC - -When the library is built using GCC it is necessary to link with the pthread -library due to how GCC implements `std::thread`. Failing to link to pthread will -lead to runtime exceptions (unless you're using libc++), not linker errors. See -[issue #67](https://github.com/google/benchmark/issues/67) for more details. You -can link to pthread by adding `-pthread` to your linker command. Note, you can -also use `-lpthread`, but there are potential issues with ordering of command -line parameters if you use that. - -### Building with Visual Studio 2015 or 2017 - -The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following: - -``` -// Alternatively, can add libraries using linker options. -#ifdef _WIN32 -#pragma comment ( lib, "Shlwapi.lib" ) -#ifdef _DEBUG -#pragma comment ( lib, "benchmarkd.lib" ) -#else -#pragma comment ( lib, "benchmark.lib" ) -#endif -#endif -``` - -Can also use the graphical version of CMake: -* Open `CMake GUI`. -* Under `Where to build the binaries`, same path as source plus `build`. -* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`. -* Click `Configure`, `Generate`, `Open Project`. -* If build fails, try deleting entire directory and starting again, or unticking options to build less. - -### Building with Intel 2015 Update 1 or Intel System Studio Update 4 - -See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel. - -### Building on Solaris - -If you're running benchmarks on solaris, you'll want the kstat library linked in -too (`-lkstat`). - -## User Guide - -### Command Line - -[Output Formats](#output-formats) - -[Output Files](#output-files) - -[Running Benchmarks](#running-benchmarks) - -[Running a Subset of Benchmarks](#running-a-subset-of-benchmarks) - -[Result Comparison](#result-comparison) - -### Library - -[Runtime and Reporting Considerations](#runtime-and-reporting-considerations) - -[Passing Arguments](#passing-arguments) - -[Calculating Asymptotic Complexity](#asymptotic-complexity) - -[Templated Benchmarks](#templated-benchmarks) - -[Fixtures](#fixtures) - -[Custom Counters](#custom-counters) - -[Multithreaded Benchmarks](#multithreaded-benchmarks) - -[CPU Timers](#cpu-timers) - -[Manual Timing](#manual-timing) - -[Setting the Time Unit](#setting-the-time-unit) - -[Preventing Optimization](#preventing-optimization) - -[Reporting Statistics](#reporting-statistics) - -[Custom Statistics](#custom-statistics) - -[Using RegisterBenchmark](#using-register-benchmark) - -[Exiting with an Error](#exiting-with-an-error) - -[A Faster KeepRunning Loop](#a-faster-keep-running-loop) - -[Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) - - - - -### Output Formats - -The library supports multiple output formats. Use the -`--benchmark_format=` flag (or set the -`BENCHMARK_FORMAT=` environment variable) to set -the format type. `console` is the default format. - -The Console format is intended to be a human readable format. By default -the format generates color output. Context is output on stderr and the -tabular data on stdout. Example tabular output looks like: - -``` -Benchmark Time(ns) CPU(ns) Iterations ----------------------------------------------------------------------- -BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s -BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s -BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s -``` - -The JSON format outputs human readable json split into two top level attributes. -The `context` attribute contains information about the run in general, including -information about the CPU and the date. -The `benchmarks` attribute contains a list of every benchmark run. Example json -output looks like: - -```json -{ - "context": { - "date": "2015/03/17-18:40:25", - "num_cpus": 40, - "mhz_per_cpu": 2801, - "cpu_scaling_enabled": false, - "build_type": "debug" - }, - "benchmarks": [ - { - "name": "BM_SetInsert/1024/1", - "iterations": 94877, - "real_time": 29275, - "cpu_time": 29836, - "bytes_per_second": 134066, - "items_per_second": 33516 - }, - { - "name": "BM_SetInsert/1024/8", - "iterations": 21609, - "real_time": 32317, - "cpu_time": 32429, - "bytes_per_second": 986770, - "items_per_second": 246693 - }, - { - "name": "BM_SetInsert/1024/10", - "iterations": 21393, - "real_time": 32724, - "cpu_time": 33355, - "bytes_per_second": 1199226, - "items_per_second": 299807 - } - ] -} -``` - -The CSV format outputs comma-separated values. The `context` is output on stderr -and the CSV itself on stdout. Example CSV output looks like: - -``` -name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label -"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, -"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, -"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, -``` - - - -### Output Files - -Write benchmark results to a file with the `--benchmark_out=` option -(or set `BENCHMARK_OUT`). Specify the output format with -`--benchmark_out_format={json|console|csv}` (or set -`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that specifying -`--benchmark_out` does not suppress the console output. - - - -### Running Benchmarks - -Benchmarks are executed by running the produced binaries. Benchmarks binaries, -by default, accept options that may be specified either through their command -line interface or by setting environment variables before execution. For every -`--option_flag=` CLI switch, a corresponding environment variable -`OPTION_FLAG=` exist and is used as default if set (CLI switches always - prevails). A complete list of CLI options is available running benchmarks - with the `--help` switch. - - - -### Running a Subset of Benchmarks - -The `--benchmark_filter=` option (or `BENCHMARK_FILTER=` -environment variable) can be used to only run the benchmarks that match -the specified ``. For example: - -```bash -$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32 -Run on (1 X 2300 MHz CPU ) -2016-06-25 19:34:24 -Benchmark Time CPU Iterations ----------------------------------------------------- -BM_memcpy/32 11 ns 11 ns 79545455 -BM_memcpy/32k 2181 ns 2185 ns 324074 -BM_memcpy/32 12 ns 12 ns 54687500 -BM_memcpy/32k 1834 ns 1837 ns 357143 -``` - - - -### Result comparison - -It is possible to compare the benchmarking results. -See [Additional Tooling Documentation](docs/tools.md) - - - -### Runtime and Reporting Considerations - -When the benchmark binary is executed, each benchmark function is run serially. -The number of iterations to run is determined dynamically by running the -benchmark a few times and measuring the time taken and ensuring that the -ultimate result will be statistically stable. As such, faster benchmark -functions will be run for more iterations than slower benchmark functions, and -the number of iterations is thus reported. - -In all cases, the number of iterations for which the benchmark is run is -governed by the amount of time the benchmark takes. Concretely, the number of -iterations is at least one, not more than 1e9, until CPU time is greater than -the minimum time, or the wallclock time is 5x minimum time. The minimum time is -set per benchmark by calling `MinTime` on the registered benchmark object. - -Average timings are then reported over the iterations run. If multiple -repetitions are requested using the `--benchmark_repetitions` command-line -option, or at registration time, the benchmark function will be run several -times and statistical results across these repetitions will also be reported. - -As well as the per-benchmark entries, a preamble in the report will include -information about the machine on which the benchmarks are run. - - - -### Passing Arguments - -Sometimes a family of benchmarks can be implemented with just one routine that -takes an extra argument to specify which one of the family of benchmarks to -run. For example, the following code defines a family of benchmarks for -measuring the speed of `memcpy()` calls of different lengths: - -```c++ -static void BM_memcpy(benchmark::State& state) { - char* src = new char[state.range(0)]; - char* dst = new char[state.range(0)]; - memset(src, 'x', state.range(0)); - for (auto _ : state) - memcpy(dst, src, state.range(0)); - state.SetBytesProcessed(int64_t(state.iterations()) * - int64_t(state.range(0))); - delete[] src; - delete[] dst; -} -BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); -``` - -The preceding code is quite repetitive, and can be replaced with the following -short-hand. The following invocation will pick a few appropriate arguments in -the specified range and will generate a benchmark for each such argument. - -```c++ -BENCHMARK(BM_memcpy)->Range(8, 8<<10); -``` - -By default the arguments in the range are generated in multiples of eight and -the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the -range multiplier is changed to multiples of two. - -```c++ -BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); -``` - -Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. - -The preceding code shows a method of defining a sparse range. The following -example shows a method of defining a dense range. It is then used to benchmark -the performance of `std::vector` initialization for uniformly increasing sizes. - -```c++ -static void BM_DenseRange(benchmark::State& state) { - for(auto _ : state) { - std::vector v(state.range(0), state.range(0)); - benchmark::DoNotOptimize(v.data()); - benchmark::ClobberMemory(); - } -} -BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128); -``` - -Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ]. - -You might have a benchmark that depends on two or more inputs. For example, the -following code defines a family of benchmarks for measuring the speed of set -insertion. - -```c++ -static void BM_SetInsert(benchmark::State& state) { - std::set data; - for (auto _ : state) { - state.PauseTiming(); - data = ConstructRandomSet(state.range(0)); - state.ResumeTiming(); - for (int j = 0; j < state.range(1); ++j) - data.insert(RandomNumber()); - } -} -BENCHMARK(BM_SetInsert) - ->Args({1<<10, 128}) - ->Args({2<<10, 128}) - ->Args({4<<10, 128}) - ->Args({8<<10, 128}) - ->Args({1<<10, 512}) - ->Args({2<<10, 512}) - ->Args({4<<10, 512}) - ->Args({8<<10, 512}); -``` - -The preceding code is quite repetitive, and can be replaced with the following -short-hand. The following macro will pick a few appropriate arguments in the -product of the two specified ranges and will generate a benchmark for each such -pair. - -```c++ -BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); -``` - -Some benchmarks may require specific argument values that cannot be expressed -with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a -benchmark input for each combination in the product of the supplied vectors. - -```c++ -BENCHMARK(BM_SetInsert) - ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) -// would generate the same benchmark arguments as -BENCHMARK(BM_SetInsert) - ->Args({1<<10, 20}) - ->Args({3<<10, 20}) - ->Args({8<<10, 20}) - ->Args({3<<10, 40}) - ->Args({8<<10, 40}) - ->Args({1<<10, 40}) - ->Args({1<<10, 60}) - ->Args({3<<10, 60}) - ->Args({8<<10, 60}) - ->Args({1<<10, 80}) - ->Args({3<<10, 80}) - ->Args({8<<10, 80}); -``` - -For more complex patterns of inputs, passing a custom function to `Apply` allows -programmatic specification of an arbitrary set of arguments on which to run the -benchmark. The following example enumerates a dense range on one parameter, -and a sparse range on the second. - -```c++ -static void CustomArguments(benchmark::internal::Benchmark* b) { - for (int i = 0; i <= 10; ++i) - for (int j = 32; j <= 1024*1024; j *= 8) - b->Args({i, j}); -} -BENCHMARK(BM_SetInsert)->Apply(CustomArguments); -``` - -#### Passing Arbitrary Arguments to a Benchmark - -In C++11 it is possible to define a benchmark that takes an arbitrary number -of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` -macro creates a benchmark that invokes `func` with the `benchmark::State` as -the first argument followed by the specified `args...`. -The `test_case_name` is appended to the name of the benchmark and -should describe the values passed. - -```c++ -template -void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { - [...] -} -// Registers a benchmark named "BM_takes_args/int_string_test" that passes -// the specified values to `extra_args`. -BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); -``` - -Note that elements of `...args` may refer to global variables. Users should -avoid modifying global state inside of a benchmark. - - - -### Calculating Asymptotic Complexity (Big O) - -Asymptotic complexity might be calculated for a family of benchmarks. The -following code will calculate the coefficient for the high-order term in the -running time and the normalized root-mean square error of string comparison. - -```c++ -static void BM_StringCompare(benchmark::State& state) { - std::string s1(state.range(0), '-'); - std::string s2(state.range(0), '-'); - for (auto _ : state) { - benchmark::DoNotOptimize(s1.compare(s2)); - } - state.SetComplexityN(state.range(0)); -} -BENCHMARK(BM_StringCompare) - ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); -``` - -As shown in the following invocation, asymptotic complexity might also be -calculated automatically. - -```c++ -BENCHMARK(BM_StringCompare) - ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); -``` - -The following code will specify asymptotic complexity with a lambda function, -that might be used to customize high-order term calculation. - -```c++ -BENCHMARK(BM_StringCompare)->RangeMultiplier(2) - ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; }); -``` - - - -### Templated Benchmarks - -This example produces and consumes messages of size `sizeof(v)` `range_x` -times. It also outputs throughput in the absence of multiprogramming. - -```c++ -template void BM_Sequential(benchmark::State& state) { - Q q; - typename Q::value_type v; - for (auto _ : state) { - for (int i = state.range(0); i--; ) - q.push(v); - for (int e = state.range(0); e--; ) - q.Wait(&v); - } - // actually messages, not bytes: - state.SetBytesProcessed( - static_cast(state.iterations())*state.range(0)); -} -BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); -``` - -Three macros are provided for adding benchmark templates. - -```c++ -#ifdef BENCHMARK_HAS_CXX11 -#define BENCHMARK_TEMPLATE(func, ...) // Takes any number of parameters. -#else // C++ < C++11 -#define BENCHMARK_TEMPLATE(func, arg1) -#endif -#define BENCHMARK_TEMPLATE1(func, arg1) -#define BENCHMARK_TEMPLATE2(func, arg1, arg2) -``` - - - -### Fixtures - -Fixture tests are created by first defining a type that derives from -`::benchmark::Fixture` and then creating/registering the tests using the -following macros: - -* `BENCHMARK_F(ClassName, Method)` -* `BENCHMARK_DEFINE_F(ClassName, Method)` -* `BENCHMARK_REGISTER_F(ClassName, Method)` - -For Example: - -```c++ -class MyFixture : public benchmark::Fixture { -public: - void SetUp(const ::benchmark::State& state) { - } - - void TearDown(const ::benchmark::State& state) { - } -}; - -BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} -/* BarTest is NOT registered */ -BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); -/* BarTest is now registered */ -``` - -#### Templated Fixtures - -Also you can create templated fixture by using the following macros: - -* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)` -* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)` - -For example: - -```c++ -template -class MyFixture : public benchmark::Fixture {}; - -BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2); -``` - - - -### Custom Counters - -You can add your own counters with user-defined names. The example below -will add columns "Foo", "Bar" and "Baz" in its output: - -```c++ -static void UserCountersExample1(benchmark::State& state) { - double numFoos = 0, numBars = 0, numBazs = 0; - for (auto _ : state) { - // ... count Foo,Bar,Baz events - } - state.counters["Foo"] = numFoos; - state.counters["Bar"] = numBars; - state.counters["Baz"] = numBazs; -} -``` - -The `state.counters` object is a `std::map` with `std::string` keys -and `Counter` values. The latter is a `double`-like class, via an implicit -conversion to `double&`. Thus you can use all of the standard arithmetic -assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter. - -In multithreaded benchmarks, each counter is set on the calling thread only. -When the benchmark finishes, the counters from each thread will be summed; -the resulting sum is the value which will be shown for the benchmark. - -The `Counter` constructor accepts three parameters: the value as a `double` -; a bit flag which allows you to show counters as rates, and/or as per-thread -iteration, and/or as per-thread averages, and/or iteration invariants, -and/or finally inverting the result; and a flag specifying the 'unit' - i.e. -is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024 -(`benchmark::Counter::OneK::kIs1024`)? - -```c++ - // sets a simple counter - state.counters["Foo"] = numFoos; - - // Set the counter as a rate. It will be presented divided - // by the duration of the benchmark. - // Meaning: per one second, how many 'foo's are processed? - state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); - - // Set the counter as a rate. It will be presented divided - // by the duration of the benchmark, and the result inverted. - // Meaning: how many seconds it takes to process one 'foo'? - state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); - - // Set the counter as a thread-average quantity. It will - // be presented divided by the number of threads. - state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); - - // There's also a combined flag: - state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate); - - // This says that we process with the rate of state.range(0) bytes every iteration: - state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024); -``` - -When you're compiling in C++11 mode or later you can use `insert()` with -`std::initializer_list`: - -```c++ - // With C++11, this can be done: - state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); - // ... instead of: - state.counters["Foo"] = numFoos; - state.counters["Bar"] = numBars; - state.counters["Baz"] = numBazs; -``` - -#### Counter Reporting - -When using the console reporter, by default, user counters are printed at -the end after the table, the same way as ``bytes_processed`` and -``items_processed``. This is best for cases in which there are few counters, -or where there are only a couple of lines per benchmark. Here's an example of -the default output: - -``` ------------------------------------------------------------------------------- -Benchmark Time CPU Iterations UserCounters... ------------------------------------------------------------------------------- -BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8 -BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m -BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2 -BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4 -BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8 -BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16 -BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32 -BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4 -BM_Factorial 26 ns 26 ns 26608979 40320 -BM_Factorial/real_time 26 ns 26 ns 26587936 40320 -BM_CalculatePiRange/1 16 ns 16 ns 45704255 0 -BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374 -BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746 -BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355 -``` - -If this doesn't suit you, you can print each counter as a table column by -passing the flag `--benchmark_counters_tabular=true` to the benchmark -application. This is best for cases in which there are a lot of counters, or -a lot of lines per individual benchmark. Note that this will trigger a -reprinting of the table header any time the counter set changes between -individual benchmarks. Here's an example of corresponding output when -`--benchmark_counters_tabular=true` is passed: - -``` ---------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations Bar Bat Baz Foo ---------------------------------------------------------------------------------------- -BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8 -BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1 -BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2 -BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4 -BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8 -BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16 -BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32 -BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4 --------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------- -BM_Factorial 26 ns 26 ns 26392245 40320 -BM_Factorial/real_time 26 ns 26 ns 26494107 40320 -BM_CalculatePiRange/1 15 ns 15 ns 45571597 0 -BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374 -BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746 -BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355 -BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184 -BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162 -BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416 -BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159 -BM_CalculatePi/threads:8 2255 ns 9943 ns 70936 -``` - -Note above the additional header printed when the benchmark changes from -``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does -not have the same counter set as ``BM_UserCounter``. - - - -### Multithreaded Benchmarks - -In a multithreaded test (benchmark invoked by multiple threads simultaneously), -it is guaranteed that none of the threads will start until all have reached -the start of the benchmark loop, and all will have finished before any thread -exits the benchmark loop. (This behavior is also provided by the `KeepRunning()` -API) As such, any global setup or teardown can be wrapped in a check against the thread -index: - -```c++ -static void BM_MultiThreaded(benchmark::State& state) { - if (state.thread_index == 0) { - // Setup code here. - } - for (auto _ : state) { - // Run the test as normal. - } - if (state.thread_index == 0) { - // Teardown code here. - } -} -BENCHMARK(BM_MultiThreaded)->Threads(2); -``` - -If the benchmarked code itself uses threads and you want to compare it to -single-threaded code, you may want to use real-time ("wallclock") measurements -for latency comparisons: - -```c++ -BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); -``` - -Without `UseRealTime`, CPU time is used by default. - - - -### CPU Timers - -By default, the CPU timer only measures the time spent by the main thread. -If the benchmark itself uses threads internally, this measurement may not -be what you are looking for. Instead, there is a way to measure the total -CPU usage of the process, by all the threads. - -```c++ -void callee(int i); - -static void MyMain(int size) { -#pragma omp parallel for - for(int i = 0; i < size; i++) - callee(i); -} - -static void BM_OpenMP(benchmark::State& state) { - for (auto _ : state) - MyMain(state.range(0)); -} - -// Measure the time spent by the main thread, use it to decide for how long to -// run the benchmark loop. Depending on the internal implementation detail may -// measure to anywhere from near-zero (the overhead spent before/after work -// handoff to worker thread[s]) to the whole single-thread time. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10); - -// Measure the user-visible time, the wall clock (literally, the time that -// has passed on the clock on the wall), use it to decide for how long to -// run the benchmark loop. This will always be meaningful, an will match the -// time spent by the main thread in single-threaded case, in general decreasing -// with the number of internal threads doing the work. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime(); - -// Measure the total CPU consumption, use it to decide for how long to -// run the benchmark loop. This will always measure to no less than the -// time spent by the main thread in single-threaded case. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime(); - -// A mixture of the last two. Measure the total CPU consumption, but use the -// wall clock to decide for how long to run the benchmark loop. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime(); -``` - -#### Controlling Timers - -Normally, the entire duration of the work loop (`for (auto _ : state) {}`) -is measured. But sometimes, it is necessary to do some work inside of -that loop, every iteration, but without counting that time to the benchmark time. -That is possible, although it is not recommended, since it has high overhead. - -```c++ -static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { - std::set data; - for (auto _ : state) { - state.PauseTiming(); // Stop timers. They will not count until they are resumed. - data = ConstructRandomSet(state.range(0)); // Do something that should not be measured - state.ResumeTiming(); // And resume timers. They are now counting again. - // The rest will be measured. - for (int j = 0; j < state.range(1); ++j) - data.insert(RandomNumber()); - } -} -BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); -``` - - - -### Manual Timing - -For benchmarking something for which neither CPU time nor real-time are -correct or accurate enough, completely manual timing is supported using -the `UseManualTime` function. - -When `UseManualTime` is used, the benchmarked code must call -`SetIterationTime` once per iteration of the benchmark loop to -report the manually measured time. - -An example use case for this is benchmarking GPU execution (e.g. OpenCL -or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot -be accurately measured using CPU time or real-time. Instead, they can be -measured accurately using a dedicated API, and these measurement results -can be reported back with `SetIterationTime`. - -```c++ -static void BM_ManualTiming(benchmark::State& state) { - int microseconds = state.range(0); - std::chrono::duration sleep_duration { - static_cast(microseconds) - }; - - for (auto _ : state) { - auto start = std::chrono::high_resolution_clock::now(); - // Simulate some useful workload with a sleep - std::this_thread::sleep_for(sleep_duration); - auto end = std::chrono::high_resolution_clock::now(); - - auto elapsed_seconds = - std::chrono::duration_cast>( - end - start); - - state.SetIterationTime(elapsed_seconds.count()); - } -} -BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); -``` - - - -### Setting the Time Unit - -If a benchmark runs a few milliseconds it may be hard to visually compare the -measured times, since the output data is given in nanoseconds per default. In -order to manually set the time unit, you can specify it manually: - -```c++ -BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); -``` - - - -### Preventing Optimization - -To prevent a value or expression from being optimized away by the compiler -the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` -functions can be used. - -```c++ -static void BM_test(benchmark::State& state) { - for (auto _ : state) { - int x = 0; - for (int i=0; i < 64; ++i) { - benchmark::DoNotOptimize(x += i); - } - } -} -``` - -`DoNotOptimize()` forces the *result* of `` to be stored in either -memory or a register. For GNU based compilers it acts as read/write barrier -for global memory. More specifically it forces the compiler to flush pending -writes to memory and reload any other values as necessary. - -Note that `DoNotOptimize()` does not prevent optimizations on `` -in any way. `` may even be removed entirely when the result is already -known. For example: - -```c++ - /* Example 1: `` is removed entirely. */ - int foo(int x) { return x + 42; } - while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); - - /* Example 2: Result of '' is only reused */ - int bar(int) __attribute__((const)); - while (...) DoNotOptimize(bar(0)); // Optimized to: - // int __result__ = bar(0); - // while (...) DoNotOptimize(__result__); -``` - -The second tool for preventing optimizations is `ClobberMemory()`. In essence -`ClobberMemory()` forces the compiler to perform all pending writes to global -memory. Memory managed by block scope objects must be "escaped" using -`DoNotOptimize(...)` before it can be clobbered. In the below example -`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized -away. - -```c++ -static void BM_vector_push_back(benchmark::State& state) { - for (auto _ : state) { - std::vector v; - v.reserve(1); - benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered. - v.push_back(42); - benchmark::ClobberMemory(); // Force 42 to be written to memory. - } -} -``` - -Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. - - - -### Statistics: Reporting the Mean, Median and Standard Deviation of Repeated Benchmarks - -By default each benchmark is run once and that single result is reported. -However benchmarks are often noisy and a single result may not be representative -of the overall behavior. For this reason it's possible to repeatedly rerun the -benchmark. - -The number of runs of each benchmark is specified globally by the -`--benchmark_repetitions` flag or on a per benchmark basis by calling -`Repetitions` on the registered benchmark object. When a benchmark is run more -than once the mean, median and standard deviation of the runs will be reported. - -Additionally the `--benchmark_report_aggregates_only={true|false}`, -`--benchmark_display_aggregates_only={true|false}` flags or -`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be -used to change how repeated tests are reported. By default the result of each -repeated run is reported. When `report aggregates only` option is `true`, -only the aggregates (i.e. mean, median and standard deviation, maybe complexity -measurements if they were requested) of the runs is reported, to both the -reporters - standard output (console), and the file. -However when only the `display aggregates only` option is `true`, -only the aggregates are displayed in the standard output, while the file -output still contains everything. -Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a -registered benchmark object overrides the value of the appropriate flag for that -benchmark. - - - -### Custom Statistics - -While having mean, median and standard deviation is nice, this may not be -enough for everyone. For example you may want to know what the largest -observation is, e.g. because you have some real-time constraints. This is easy. -The following code will specify a custom statistic to be calculated, defined -by a lambda function. - -```c++ -void BM_spin_empty(benchmark::State& state) { - for (auto _ : state) { - for (int x = 0; x < state.range(0); ++x) { - benchmark::DoNotOptimize(x); - } - } -} - -BENCHMARK(BM_spin_empty) - ->ComputeStatistics("max", [](const std::vector& v) -> double { - return *(std::max_element(std::begin(v), std::end(v))); - }) - ->Arg(512); -``` - - - -### Using RegisterBenchmark(name, fn, args...) - -The `RegisterBenchmark(name, func, args...)` function provides an alternative -way to create and register benchmarks. -`RegisterBenchmark(name, func, args...)` creates, registers, and returns a -pointer to a new benchmark with the specified `name` that invokes -`func(st, args...)` where `st` is a `benchmark::State` object. - -Unlike the `BENCHMARK` registration macros, which can only be used at the global -scope, the `RegisterBenchmark` can be called anywhere. This allows for -benchmark tests to be registered programmatically. - -Additionally `RegisterBenchmark` allows any callable object to be registered -as a benchmark. Including capturing lambdas and function objects. - -For Example: -```c++ -auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ }; - -int main(int argc, char** argv) { - for (auto& test_input : { /* ... */ }) - benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input); - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); -} -``` - - - -### Exiting with an Error - -When errors caused by external influences, such as file I/O and network -communication, occur within a benchmark the -`State::SkipWithError(const char* msg)` function can be used to skip that run -of benchmark and report the error. Note that only future iterations of the -`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop -Users must explicitly exit the loop, otherwise all iterations will be performed. -Users may explicitly return to exit the benchmark immediately. - -The `SkipWithError(...)` function may be used at any point within the benchmark, -including before and after the benchmark loop. Moreover, if `SkipWithError(...)` -has been used, it is not required to reach the benchmark loop and one may return -from the benchmark function early. - -For example: - -```c++ -static void BM_test(benchmark::State& state) { - auto resource = GetResource(); - if (!resource.good()) { - state.SkipWithError("Resource is not good!"); - // KeepRunning() loop will not be entered. - } - while (state.KeepRunning()) { - auto data = resource.read_data(); - if (!resource.good()) { - state.SkipWithError("Failed to read data!"); - break; // Needed to skip the rest of the iteration. - } - do_stuff(data); - } -} - -static void BM_test_ranged_fo(benchmark::State & state) { - auto resource = GetResource(); - if (!resource.good()) { - state.SkipWithError("Resource is not good!"); - return; // Early return is allowed when SkipWithError() has been used. - } - for (auto _ : state) { - auto data = resource.read_data(); - if (!resource.good()) { - state.SkipWithError("Failed to read data!"); - break; // REQUIRED to prevent all further iterations. - } - do_stuff(data); - } -} -``` - - -### A Faster KeepRunning Loop - -In C++11 mode, a ranged-based for loop should be used in preference to -the `KeepRunning` loop for running the benchmarks. For example: - -```c++ -static void BM_Fast(benchmark::State &state) { - for (auto _ : state) { - FastOperation(); - } -} -BENCHMARK(BM_Fast); -``` - -The reason the ranged-for loop is faster than using `KeepRunning`, is -because `KeepRunning` requires a memory load and store of the iteration count -ever iteration, whereas the ranged-for variant is able to keep the iteration count -in a register. - -For example, an empty inner loop of using the ranged-based for method looks like: - -```asm -# Loop Init - mov rbx, qword ptr [r14 + 104] - call benchmark::State::StartKeepRunning() - test rbx, rbx - je .LoopEnd -.LoopHeader: # =>This Inner Loop Header: Depth=1 - add rbx, -1 - jne .LoopHeader -.LoopEnd: -``` - -Compared to an empty `KeepRunning` loop, which looks like: - -```asm -.LoopHeader: # in Loop: Header=BB0_3 Depth=1 - cmp byte ptr [rbx], 1 - jne .LoopInit -.LoopBody: # =>This Inner Loop Header: Depth=1 - mov rax, qword ptr [rbx + 8] - lea rcx, [rax + 1] - mov qword ptr [rbx + 8], rcx - cmp rax, qword ptr [rbx + 104] - jb .LoopHeader - jmp .LoopEnd -.LoopInit: - mov rdi, rbx - call benchmark::State::StartKeepRunning() - jmp .LoopBody -.LoopEnd: -``` - -Unless C++03 compatibility is required, the ranged-for variant of writing -the benchmark loop should be preferred. - - - -### Disabling CPU Frequency Scaling - -If you see this error: - -``` -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -``` - -you might want to disable the CPU frequency scaling while running the benchmark: - -```bash -sudo cpupower frequency-set --governor performance -./mybench -sudo cpupower frequency-set --governor powersave -``` diff --git a/WORKSPACE b/WORKSPACE index c00d12c..949eb98 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1,12 +1,7 @@ workspace(name = "com_github_google_benchmark") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - -http_archive( - name = "rules_cc", - strip_prefix = "rules_cc-a508235df92e71d537fcbae0c7c952ea6957a912", - urls = ["https://github.com/bazelbuild/rules_cc/archive/a508235df92e71d537fcbae0c7c952ea6957a912.zip"], -) +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") http_archive( name = "com_google_absl", @@ -15,10 +10,10 @@ http_archive( urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"], ) -http_archive( +git_repository( name = "com_google_googletest", - strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e", - urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"], + remote = "https://github.com/google/googletest.git", + tag = "release-1.11.0", ) http_archive( @@ -34,3 +29,16 @@ new_local_repository( build_file = "@//bindings/python:python_headers.BUILD", path = "/usr/include/python3.6", # May be overwritten by setup.py. ) + +http_archive( + name = "rules_python", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz", + sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0", +) + +load("@rules_python//python:pip.bzl", pip3_install="pip_install") + +pip3_install( + name = "py_deps", + requirements = "//:requirements.txt", +) diff --git a/_config.yml b/_config.yml index 1885487..1fa5ff8 100644 --- a/_config.yml +++ b/_config.yml @@ -1 +1,2 @@ -theme: jekyll-theme-midnight \ No newline at end of file +theme: jekyll-theme-midnight +markdown: GFM diff --git a/bindings/python/google_benchmark/__init__.py b/bindings/python/google_benchmark/__init__.py index 787c423..ec651c1 100644 --- a/bindings/python/google_benchmark/__init__.py +++ b/bindings/python/google_benchmark/__init__.py @@ -34,6 +34,7 @@ from google_benchmark._benchmark import ( kNanosecond, kMicrosecond, kMillisecond, + kSecond, oNone, o1, oN, @@ -53,6 +54,7 @@ __all__ = [ "kNanosecond", "kMicrosecond", "kMillisecond", + "kSecond", "oNone", "o1", "oN", @@ -64,7 +66,7 @@ __all__ = [ "oLambda", ] -__version__ = "0.2.0" +__version__ = "1.6.1" class __OptionMaker: @@ -108,7 +110,7 @@ class __OptionMaker: # Alias for nicer API. -# We have to instanciate an object, even if stateless, to be able to use __getattr__ +# We have to instantiate an object, even if stateless, to be able to use __getattr__ # on option.range option = __OptionMaker() diff --git a/bindings/python/google_benchmark/benchmark.cc b/bindings/python/google_benchmark/benchmark.cc index a733339..02b6ed7 100644 --- a/bindings/python/google_benchmark/benchmark.cc +++ b/bindings/python/google_benchmark/benchmark.cc @@ -49,6 +49,7 @@ PYBIND11_MODULE(_benchmark, m) { .value("kNanosecond", TimeUnit::kNanosecond) .value("kMicrosecond", TimeUnit::kMicrosecond) .value("kMillisecond", TimeUnit::kMillisecond) + .value("kSecond", TimeUnit::kSecond) .export_values(); using benchmark::BigO; @@ -156,7 +157,7 @@ PYBIND11_MODULE(_benchmark, m) { .def("pause_timing", &State::PauseTiming) .def("resume_timing", &State::ResumeTiming) .def("skip_with_error", &State::SkipWithError) - .def_property_readonly("error_occured", &State::error_occurred) + .def_property_readonly("error_occurred", &State::error_occurred) .def("set_iteration_time", &State::SetIterationTime) .def_property("bytes_processed", &State::bytes_processed, &State::SetBytesProcessed) @@ -164,12 +165,12 @@ PYBIND11_MODULE(_benchmark, m) { &State::SetComplexityN) .def_property("items_processed", &State::items_processed, &State::SetItemsProcessed) - .def("set_label", (void (State::*)(const char*)) & State::SetLabel) + .def("set_label", (void(State::*)(const char*)) & State::SetLabel) .def("range", &State::range, py::arg("pos") = 0) .def_property_readonly("iterations", &State::iterations) .def_readwrite("counters", &State::counters) - .def_readonly("thread_index", &State::thread_index) - .def_readonly("threads", &State::threads); + .def_property_readonly("thread_index", &State::thread_index) + .def_property_readonly("threads", &State::threads); m.def("Initialize", Initialize); m.def("RegisterBenchmark", RegisterBenchmark, diff --git a/bindings/python/google_benchmark/example.py b/bindings/python/google_benchmark/example.py index 9134e8c..487acc9 100644 --- a/bindings/python/google_benchmark/example.py +++ b/bindings/python/google_benchmark/example.py @@ -102,7 +102,7 @@ def with_options(state): @benchmark.register(name="sum_million_microseconds") @benchmark.option.unit(benchmark.kMicrosecond) -def with_options(state): +def with_options2(state): while state: sum(range(1_000_000)) diff --git a/cmake/AddCXXCompilerFlag.cmake b/cmake/AddCXXCompilerFlag.cmake index d0d2099..858589e 100644 --- a/cmake/AddCXXCompilerFlag.cmake +++ b/cmake/AddCXXCompilerFlag.cmake @@ -34,9 +34,11 @@ function(add_cxx_compiler_flag FLAG) check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") if(${MANGLED_FLAG}) - set(VARIANT ${ARGV1}) - if(ARGV1) + if(ARGC GREATER 1) + set(VARIANT ${ARGV1}) string(TOUPPER "_${VARIANT}" VARIANT) + else() + set(VARIANT "") endif() set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) endif() @@ -49,9 +51,11 @@ function(add_required_cxx_compiler_flag FLAG) check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") if(${MANGLED_FLAG}) - set(VARIANT ${ARGV1}) - if(ARGV1) + if(ARGC GREATER 1) + set(VARIANT ${ARGV1}) string(TOUPPER "_${VARIANT}" VARIANT) + else() + set(VARIANT "") endif() set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in index 6e9256e..2e15f0c 100644 --- a/cmake/Config.cmake.in +++ b/cmake/Config.cmake.in @@ -1 +1,7 @@ +@PACKAGE_INIT@ + +include (CMakeFindDependencyMacro) + +find_dependency (Threads) + include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake") diff --git a/cmake/GetGitVersion.cmake b/cmake/GetGitVersion.cmake index 4f10f22..04a1f9b 100644 --- a/cmake/GetGitVersion.cmake +++ b/cmake/GetGitVersion.cmake @@ -20,16 +20,20 @@ set(__get_git_version INCLUDED) function(get_git_version var) if(GIT_EXECUTABLE) - execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 + execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} RESULT_VARIABLE status - OUTPUT_VARIABLE GIT_VERSION + OUTPUT_VARIABLE GIT_DESCRIBE_VERSION ERROR_QUIET) - if(${status}) - set(GIT_VERSION "v0.0.0") + if(status) + set(GIT_DESCRIBE_VERSION "v0.0.0") + endif() + + string(STRIP ${GIT_DESCRIBE_VERSION} GIT_DESCRIBE_VERSION) + if(GIT_DESCRIBE_VERSION MATCHES v[^-]*-) + string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" GIT_VERSION ${GIT_DESCRIBE_VERSION}) else() - string(STRIP ${GIT_VERSION} GIT_VERSION) - string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION}) + string(REGEX REPLACE "v(.*)" "\\1" GIT_VERSION ${GIT_DESCRIBE_VERSION}) endif() # Work out if the repository is dirty @@ -43,12 +47,12 @@ function(get_git_version var) ERROR_QUIET) string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY) if (${GIT_DIRTY}) - set(GIT_VERSION "${GIT_VERSION}-dirty") + set(GIT_DESCRIBE_VERSION "${GIT_DESCRIBE_VERSION}-dirty") endif() + message(STATUS "git version: ${GIT_DESCRIBE_VERSION} normalized to ${GIT_VERSION}") else() - set(GIT_VERSION "v0.0.0") + set(GIT_VERSION "0.0.0") endif() - message(STATUS "git Version: ${GIT_VERSION}") set(${var} ${GIT_VERSION} PARENT_SCOPE) endfunction() diff --git a/cmake/GoogleTest.cmake b/cmake/GoogleTest.cmake index dd611fc..66cb910 100644 --- a/cmake/GoogleTest.cmake +++ b/cmake/GoogleTest.cmake @@ -29,13 +29,20 @@ set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) include(${GOOGLETEST_PREFIX}/googletest-paths.cmake) +# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves. +add_compile_options(-w) + # Add googletest directly to our build. This defines # the gtest and gtest_main targets. add_subdirectory(${GOOGLETEST_SOURCE_DIR} ${GOOGLETEST_BINARY_DIR} EXCLUDE_FROM_ALL) -set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) -set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) -set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) -set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) +if(NOT DEFINED GTEST_COMPILE_COMMANDS) + set(GTEST_COMPILE_COMMANDS ON) +endif() + +set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) +set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) +set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) +set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) diff --git a/cmake/GoogleTest.cmake.in b/cmake/GoogleTest.cmake.in index fd957ff..ce653ac 100644 --- a/cmake/GoogleTest.cmake.in +++ b/cmake/GoogleTest.cmake.in @@ -31,13 +31,14 @@ if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}" ) else() if(NOT ALLOW_DOWNLOADING_GOOGLETEST) - message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") + message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_USE_BUNDLED_GTEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") + return() else() message(WARNING "Did not find Google Test sources! Fetching from web...") ExternalProject_Add( googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG master + GIT_TAG "release-1.11.0" PREFIX "${CMAKE_BINARY_DIR}" STAMP_DIR "${CMAKE_BINARY_DIR}/stamp" DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download" diff --git a/cmake/benchmark.pc.in b/cmake/benchmark.pc.in index 43ca8f9..34beb01 100644 --- a/cmake/benchmark.pc.in +++ b/cmake/benchmark.pc.in @@ -1,7 +1,7 @@ prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix} -libdir=${prefix}/lib -includedir=${prefix}/include +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ Name: @PROJECT_NAME@ Description: Google microbenchmark framework diff --git a/conan/CMakeLists.txt b/conan/CMakeLists.txt deleted file mode 100644 index 15b92ca..0000000 --- a/conan/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -cmake_minimum_required(VERSION 2.8.11) -project(cmake_wrapper) - -include(conanbuildinfo.cmake) -conan_basic_setup() - -include(${CMAKE_SOURCE_DIR}/CMakeListsOriginal.txt) diff --git a/conan/test_package/CMakeLists.txt b/conan/test_package/CMakeLists.txt deleted file mode 100644 index 089a6c7..0000000 --- a/conan/test_package/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -cmake_minimum_required(VERSION 2.8.11) -project(test_package) - -set(CMAKE_VERBOSE_MAKEFILE TRUE) - -include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake) -conan_basic_setup() - -add_executable(${PROJECT_NAME} test_package.cpp) -target_link_libraries(${PROJECT_NAME} ${CONAN_LIBS}) diff --git a/conan/test_package/conanfile.py b/conan/test_package/conanfile.py deleted file mode 100644 index d63f408..0000000 --- a/conan/test_package/conanfile.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from conans import ConanFile, CMake -import os - - -class TestPackageConan(ConanFile): - settings = "os", "compiler", "build_type", "arch" - generators = "cmake" - - def build(self): - cmake = CMake(self) - cmake.configure() - cmake.build() - - def test(self): - bin_path = os.path.join("bin", "test_package") - self.run(bin_path, run_environment=True) diff --git a/conan/test_package/test_package.cpp b/conan/test_package/test_package.cpp deleted file mode 100644 index 4fa7ec0..0000000 --- a/conan/test_package/test_package.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "benchmark/benchmark.h" - -void BM_StringCreation(benchmark::State& state) { - while (state.KeepRunning()) - std::string empty_string; -} - -BENCHMARK(BM_StringCreation); - -void BM_StringCopy(benchmark::State& state) { - std::string x = "hello"; - while (state.KeepRunning()) - std::string copy(x); -} - -BENCHMARK(BM_StringCopy); - -BENCHMARK_MAIN(); diff --git a/conanfile.py b/conanfile.py deleted file mode 100644 index e31fc52..0000000 --- a/conanfile.py +++ /dev/null @@ -1,79 +0,0 @@ -from conans import ConanFile, CMake, tools -from conans.errors import ConanInvalidConfiguration -import shutil -import os - - -class GoogleBenchmarkConan(ConanFile): - name = "benchmark" - description = "A microbenchmark support library." - topics = ("conan", "benchmark", "google", "microbenchmark") - url = "https://github.com/google/benchmark" - homepage = "https://github.com/google/benchmark" - author = "Google Inc." - license = "Apache-2.0" - exports_sources = ["*"] - generators = "cmake" - - settings = "arch", "build_type", "compiler", "os" - options = { - "shared": [True, False], - "fPIC": [True, False], - "enable_lto": [True, False], - "enable_exceptions": [True, False] - } - default_options = {"shared": False, "fPIC": True, "enable_lto": False, "enable_exceptions": True} - - _build_subfolder = "." - - def source(self): - # Wrap the original CMake file to call conan_basic_setup - shutil.move("CMakeLists.txt", "CMakeListsOriginal.txt") - shutil.move(os.path.join("conan", "CMakeLists.txt"), "CMakeLists.txt") - - def config_options(self): - if self.settings.os == "Windows": - if self.settings.compiler == "Visual Studio" and float(self.settings.compiler.version.value) <= 12: - raise ConanInvalidConfiguration("{} {} does not support Visual Studio <= 12".format(self.name, self.version)) - del self.options.fPIC - - def configure(self): - if self.settings.os == "Windows" and self.options.shared: - raise ConanInvalidConfiguration("Windows shared builds are not supported right now, see issue #639") - - def _configure_cmake(self): - cmake = CMake(self) - - cmake.definitions["BENCHMARK_ENABLE_TESTING"] = "OFF" - cmake.definitions["BENCHMARK_ENABLE_GTEST_TESTS"] = "OFF" - cmake.definitions["BENCHMARK_ENABLE_LTO"] = "ON" if self.options.enable_lto else "OFF" - cmake.definitions["BENCHMARK_ENABLE_EXCEPTIONS"] = "ON" if self.options.enable_exceptions else "OFF" - - # See https://github.com/google/benchmark/pull/638 for Windows 32 build explanation - if self.settings.os != "Windows": - cmake.definitions["BENCHMARK_BUILD_32_BITS"] = "ON" if "64" not in str(self.settings.arch) else "OFF" - cmake.definitions["BENCHMARK_USE_LIBCXX"] = "ON" if (str(self.settings.compiler.libcxx) == "libc++") else "OFF" - else: - cmake.definitions["BENCHMARK_USE_LIBCXX"] = "OFF" - - cmake.configure(build_folder=self._build_subfolder) - return cmake - - def build(self): - cmake = self._configure_cmake() - cmake.build() - - def package(self): - cmake = self._configure_cmake() - cmake.install() - - self.copy(pattern="LICENSE", dst="licenses") - - def package_info(self): - self.cpp_info.libs = tools.collect_libs(self) - if self.settings.os == "Linux": - self.cpp_info.libs.extend(["pthread", "rt"]) - elif self.settings.os == "Windows": - self.cpp_info.libs.append("shlwapi") - elif self.settings.os == "SunOS": - self.cpp_info.libs.append("kstat") diff --git a/docs/_config.yml b/docs/_config.yml index 1885487..2f7efbe 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -1 +1 @@ -theme: jekyll-theme-midnight \ No newline at end of file +theme: jekyll-theme-minimal \ No newline at end of file diff --git a/docs/dependencies.md b/docs/dependencies.md new file mode 100644 index 0000000..7af52b9 --- /dev/null +++ b/docs/dependencies.md @@ -0,0 +1,19 @@ +# Build tool dependency policy + +To ensure the broadest compatibility when building the benchmark library, but +still allow forward progress, we require any build tooling to be available for: + +* Debian stable _and_ +* The last two Ubuntu LTS releases + +Currently, this means using build tool versions that are available for Ubuntu +18.04 (Bionic Beaver), Ubuntu 20.04 (Focal Fossa), and Debian 11 (bullseye). + +_Note, CI also runs ubuntu-16.04 and ubuntu-14.04 to ensure best effort support +for older versions._ + +## cmake +The current supported version is cmake 3.5.1 as of 2018-06-06. + +_Note, this version is also available for Ubuntu 14.04, an older Ubuntu LTS +release, as `cmake3`._ diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..eb82eff --- /dev/null +++ b/docs/index.md @@ -0,0 +1,10 @@ +# Benchmark + +* [Assembly Tests](AssemblyTests.md) +* [Dependencies](dependencies.md) +* [Perf Counters](perf_counters.md) +* [Platform Specific Build Instructions](platform_specific_build_instructions.md) +* [Random Interleaving](random_interleaving.md) +* [Releasing](releasing.md) +* [Tools](tools.md) +* [User Guide](user_guide.md) \ No newline at end of file diff --git a/docs/perf_counters.md b/docs/perf_counters.md new file mode 100644 index 0000000..74560e9 --- /dev/null +++ b/docs/perf_counters.md @@ -0,0 +1,34 @@ + + +# User-Requested Performance Counters + +When running benchmarks, the user may choose to request collection of +performance counters. This may be useful in investigation scenarios - narrowing +down the cause of a regression; or verifying that the underlying cause of a +performance improvement matches expectations. + +This feature is available if: + +* The benchmark is run on an architecture featuring a Performance Monitoring + Unit (PMU), +* The benchmark is compiled with support for collecting counters. Currently, + this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build + time + +The feature does not require modifying benchmark code. Counter collection is +handled at the boundaries where timer collection is also handled. + +To opt-in: + +* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`. +* Enable the cmake flag BENCHMARK_ENABLE_LIBPFM. + +To use, pass a comma-separated list of counter names through the +`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning, +they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are +mapped by libpfm to platform-specifics - see libpfm +[documentation](http://perfmon2.sourceforge.net/docs.html) for more details. + +The counter values are reported back through the [User Counters](../README.md#custom-counters) +mechanism, meaning, they are available in all the formats (e.g. JSON) supported +by User Counters. \ No newline at end of file diff --git a/docs/platform_specific_build_instructions.md b/docs/platform_specific_build_instructions.md new file mode 100644 index 0000000..2d5d6c4 --- /dev/null +++ b/docs/platform_specific_build_instructions.md @@ -0,0 +1,48 @@ +# Platform Specific Build Instructions + +## Building with GCC + +When the library is built using GCC it is necessary to link with the pthread +library due to how GCC implements `std::thread`. Failing to link to pthread will +lead to runtime exceptions (unless you're using libc++), not linker errors. See +[issue #67](https://github.com/google/benchmark/issues/67) for more details. You +can link to pthread by adding `-pthread` to your linker command. Note, you can +also use `-lpthread`, but there are potential issues with ordering of command +line parameters if you use that. + +On QNX, the pthread library is part of libc and usually included automatically +(see +[`pthread_create()`](https://www.qnx.com/developers/docs/7.1/index.html#com.qnx.doc.neutrino.lib_ref/topic/p/pthread_create.html)). +There's no separate pthread library to link. + +## Building with Visual Studio 2015 or 2017 + +The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following: + +``` +// Alternatively, can add libraries using linker options. +#ifdef _WIN32 +#pragma comment ( lib, "Shlwapi.lib" ) +#ifdef _DEBUG +#pragma comment ( lib, "benchmarkd.lib" ) +#else +#pragma comment ( lib, "benchmark.lib" ) +#endif +#endif +``` + +Can also use the graphical version of CMake: +* Open `CMake GUI`. +* Under `Where to build the binaries`, same path as source plus `build`. +* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`. +* Click `Configure`, `Generate`, `Open Project`. +* If build fails, try deleting entire directory and starting again, or unticking options to build less. + +## Building with Intel 2015 Update 1 or Intel System Studio Update 4 + +See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel. + +## Building on Solaris + +If you're running benchmarks on solaris, you'll want the kstat library linked in +too (`-lkstat`). \ No newline at end of file diff --git a/docs/random_interleaving.md b/docs/random_interleaving.md new file mode 100644 index 0000000..c083036 --- /dev/null +++ b/docs/random_interleaving.md @@ -0,0 +1,13 @@ + + +# Random Interleaving + +[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a +technique to lower run-to-run variance. It randomly interleaves repetitions of a +microbenchmark with repetitions from other microbenchmarks in the same benchmark +test. Data shows it is able to lower run-to-run variance by +[40%](https://github.com/google/benchmark/issues/1051) on average. + +To use, you mainly need to set `--benchmark_enable_random_interleaving=true`, +and optionally specify non-zero repetition count `--benchmark_repetitions=9` +and optionally decrease the per-repetition time `--benchmark_min_time=0.1`. diff --git a/docs/releasing.md b/docs/releasing.md index f0cd701..334f935 100644 --- a/docs/releasing.md +++ b/docs/releasing.md @@ -1,6 +1,6 @@ # How to release -* Make sure you're on master and synced to HEAD +* Make sure you're on main and synced to HEAD * Ensure the project builds and tests run (sanity check only, obviously) * `parallel -j0 exec ::: test/*_test` can help ensure everything at least passes @@ -8,9 +8,28 @@ * `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of commits between the last annotated tag and HEAD * Pick the most interesting. +* Create one last commit that updates the version saved in `CMakeLists.txt` and the + `__version__` variable in `bindings/python/google_benchmark/__init__.py`to the release + version you're creating. (This version will be used if benchmark is installed from the + archive you'll be creating in the next step.) + +``` +project (benchmark VERSION 1.6.0 LANGUAGES CXX) +``` + +```python +# bindings/python/google_benchmark/__init__.py + +# ... + +__version__ = "1.6.0" # <-- change this to the release version you are creating + +# ... +``` + * Create a release through github's interface * Note this will create a lightweight tag. * Update this to an annotated tag: * `git pull --tags` * `git tag -a -f ` - * `git push --force origin` + * `git push --force --tags origin` diff --git a/docs/user_guide.md b/docs/user_guide.md new file mode 100644 index 0000000..34bea69 --- /dev/null +++ b/docs/user_guide.md @@ -0,0 +1,1200 @@ +# User Guide + +## Command Line + +[Output Formats](#output-formats) + +[Output Files](#output-files) + +[Running Benchmarks](#running-benchmarks) + +[Running a Subset of Benchmarks](#running-a-subset-of-benchmarks) + +[Result Comparison](#result-comparison) + +[Extra Context](#extra-context) + +## Library + +[Runtime and Reporting Considerations](#runtime-and-reporting-considerations) + +[Setup/Teardown](#setupteardown) + +[Passing Arguments](#passing-arguments) + +[Custom Benchmark Name](#custom-benchmark-name) + +[Calculating Asymptotic Complexity](#asymptotic-complexity) + +[Templated Benchmarks](#templated-benchmarks) + +[Fixtures](#fixtures) + +[Custom Counters](#custom-counters) + +[Multithreaded Benchmarks](#multithreaded-benchmarks) + +[CPU Timers](#cpu-timers) + +[Manual Timing](#manual-timing) + +[Setting the Time Unit](#setting-the-time-unit) + +[Random Interleaving](random_interleaving.md) + +[User-Requested Performance Counters](perf_counters.md) + +[Preventing Optimization](#preventing-optimization) + +[Reporting Statistics](#reporting-statistics) + +[Custom Statistics](#custom-statistics) + +[Using RegisterBenchmark](#using-register-benchmark) + +[Exiting with an Error](#exiting-with-an-error) + +[A Faster KeepRunning Loop](#a-faster-keep-running-loop) + +[Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) + + + + +## Output Formats + +The library supports multiple output formats. Use the +`--benchmark_format=` flag (or set the +`BENCHMARK_FORMAT=` environment variable) to set +the format type. `console` is the default format. + +The Console format is intended to be a human readable format. By default +the format generates color output. Context is output on stderr and the +tabular data on stdout. Example tabular output looks like: + +``` +Benchmark Time(ns) CPU(ns) Iterations +---------------------------------------------------------------------- +BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s +BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s +BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s +``` + +The JSON format outputs human readable json split into two top level attributes. +The `context` attribute contains information about the run in general, including +information about the CPU and the date. +The `benchmarks` attribute contains a list of every benchmark run. Example json +output looks like: + +```json +{ + "context": { + "date": "2015/03/17-18:40:25", + "num_cpus": 40, + "mhz_per_cpu": 2801, + "cpu_scaling_enabled": false, + "build_type": "debug" + }, + "benchmarks": [ + { + "name": "BM_SetInsert/1024/1", + "iterations": 94877, + "real_time": 29275, + "cpu_time": 29836, + "bytes_per_second": 134066, + "items_per_second": 33516 + }, + { + "name": "BM_SetInsert/1024/8", + "iterations": 21609, + "real_time": 32317, + "cpu_time": 32429, + "bytes_per_second": 986770, + "items_per_second": 246693 + }, + { + "name": "BM_SetInsert/1024/10", + "iterations": 21393, + "real_time": 32724, + "cpu_time": 33355, + "bytes_per_second": 1199226, + "items_per_second": 299807 + } + ] +} +``` + +The CSV format outputs comma-separated values. The `context` is output on stderr +and the CSV itself on stdout. Example CSV output looks like: + +``` +name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label +"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, +"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, +"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, +``` + + + +## Output Files + +Write benchmark results to a file with the `--benchmark_out=` option +(or set `BENCHMARK_OUT`). Specify the output format with +`--benchmark_out_format={json|console|csv}` (or set +`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is +deprecated and the saved `.csv` file +[is not parsable](https://github.com/google/benchmark/issues/794) by csv +parsers. + +Specifying `--benchmark_out` does not suppress the console output. + + + +## Running Benchmarks + +Benchmarks are executed by running the produced binaries. Benchmarks binaries, +by default, accept options that may be specified either through their command +line interface or by setting environment variables before execution. For every +`--option_flag=` CLI switch, a corresponding environment variable +`OPTION_FLAG=` exist and is used as default if set (CLI switches always + prevails). A complete list of CLI options is available running benchmarks + with the `--help` switch. + + + +## Running a Subset of Benchmarks + +The `--benchmark_filter=` option (or `BENCHMARK_FILTER=` +environment variable) can be used to only run the benchmarks that match +the specified ``. For example: + +```bash +$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32 +Run on (1 X 2300 MHz CPU ) +2016-06-25 19:34:24 +Benchmark Time CPU Iterations +---------------------------------------------------- +BM_memcpy/32 11 ns 11 ns 79545455 +BM_memcpy/32k 2181 ns 2185 ns 324074 +BM_memcpy/32 12 ns 12 ns 54687500 +BM_memcpy/32k 1834 ns 1837 ns 357143 +``` + + + +## Result comparison + +It is possible to compare the benchmarking results. +See [Additional Tooling Documentation](tools.md) + + + +## Extra Context + +Sometimes it's useful to add extra context to the content printed before the +results. By default this section includes information about the CPU on which +the benchmarks are running. If you do want to add more context, you can use +the `benchmark_context` command line flag: + +```bash +$ ./run_benchmarks --benchmark_context=pwd=`pwd` +Run on (1 x 2300 MHz CPU) +pwd: /home/user/benchmark/ +Benchmark Time CPU Iterations +---------------------------------------------------- +BM_memcpy/32 11 ns 11 ns 79545455 +BM_memcpy/32k 2181 ns 2185 ns 324074 +``` + +You can get the same effect with the API: + +```c++ + benchmark::AddCustomContext("foo", "bar"); +``` + +Note that attempts to add a second value with the same key will fail with an +error message. + + + +## Runtime and Reporting Considerations + +When the benchmark binary is executed, each benchmark function is run serially. +The number of iterations to run is determined dynamically by running the +benchmark a few times and measuring the time taken and ensuring that the +ultimate result will be statistically stable. As such, faster benchmark +functions will be run for more iterations than slower benchmark functions, and +the number of iterations is thus reported. + +In all cases, the number of iterations for which the benchmark is run is +governed by the amount of time the benchmark takes. Concretely, the number of +iterations is at least one, not more than 1e9, until CPU time is greater than +the minimum time, or the wallclock time is 5x minimum time. The minimum time is +set per benchmark by calling `MinTime` on the registered benchmark object. + +Average timings are then reported over the iterations run. If multiple +repetitions are requested using the `--benchmark_repetitions` command-line +option, or at registration time, the benchmark function will be run several +times and statistical results across these repetitions will also be reported. + +As well as the per-benchmark entries, a preamble in the report will include +information about the machine on which the benchmarks are run. + + + +## Setup/Teardown + +Global setup/teardown specific to each benchmark can be done by +passing a callback to Setup/Teardown: + +The setup/teardown callbacks will be invoked once for each benchmark. +If the benchmark is multi-threaded (will run in k threads), they will be invoked exactly once before +each run with k threads. +If the benchmark uses different size groups of threads, the above will be true for each size group. + +Eg., + +```c++ +static void DoSetup(const benchmark::State& state) { +} + +static void DoTeardown(const benchmark::State& state) { +} + +static void BM_func(benchmark::State& state) {...} + +BENCHMARK(BM_func)->Arg(1)->Arg(3)->Threads(16)->Threads(32)->Setup(DoSetup)->Teardown(DoTeardown); + +``` + +In this example, `DoSetup` and `DoTearDown` will be invoked 4 times each, +specifically, once for each of this family: + - BM_func_Arg_1_Threads_16, BM_func_Arg_1_Threads_32 + - BM_func_Arg_3_Threads_16, BM_func_Arg_3_Threads_32 + + + +## Passing Arguments + +Sometimes a family of benchmarks can be implemented with just one routine that +takes an extra argument to specify which one of the family of benchmarks to +run. For example, the following code defines a family of benchmarks for +measuring the speed of `memcpy()` calls of different lengths: + +```c++ +static void BM_memcpy(benchmark::State& state) { + char* src = new char[state.range(0)]; + char* dst = new char[state.range(0)]; + memset(src, 'x', state.range(0)); + for (auto _ : state) + memcpy(dst, src, state.range(0)); + state.SetBytesProcessed(int64_t(state.iterations()) * + int64_t(state.range(0))); + delete[] src; + delete[] dst; +} +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); +``` + +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following invocation will pick a few appropriate arguments in +the specified range and will generate a benchmark for each such argument. + +```c++ +BENCHMARK(BM_memcpy)->Range(8, 8<<10); +``` + +By default the arguments in the range are generated in multiples of eight and +the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the +range multiplier is changed to multiples of two. + +```c++ +BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); +``` + +Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. + +The preceding code shows a method of defining a sparse range. The following +example shows a method of defining a dense range. It is then used to benchmark +the performance of `std::vector` initialization for uniformly increasing sizes. + +```c++ +static void BM_DenseRange(benchmark::State& state) { + for(auto _ : state) { + std::vector v(state.range(0), state.range(0)); + benchmark::DoNotOptimize(v.data()); + benchmark::ClobberMemory(); + } +} +BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128); +``` + +Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ]. + +You might have a benchmark that depends on two or more inputs. For example, the +following code defines a family of benchmarks for measuring the speed of set +insertion. + +```c++ +static void BM_SetInsert(benchmark::State& state) { + std::set data; + for (auto _ : state) { + state.PauseTiming(); + data = ConstructRandomSet(state.range(0)); + state.ResumeTiming(); + for (int j = 0; j < state.range(1); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert) + ->Args({1<<10, 128}) + ->Args({2<<10, 128}) + ->Args({4<<10, 128}) + ->Args({8<<10, 128}) + ->Args({1<<10, 512}) + ->Args({2<<10, 512}) + ->Args({4<<10, 512}) + ->Args({8<<10, 512}); +``` + +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following macro will pick a few appropriate arguments in the +product of the two specified ranges and will generate a benchmark for each such +pair. + +{% raw %} +```c++ +BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); +``` +{% endraw %} + +Some benchmarks may require specific argument values that cannot be expressed +with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a +benchmark input for each combination in the product of the supplied vectors. + +{% raw %} +```c++ +BENCHMARK(BM_SetInsert) + ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) +// would generate the same benchmark arguments as +BENCHMARK(BM_SetInsert) + ->Args({1<<10, 20}) + ->Args({3<<10, 20}) + ->Args({8<<10, 20}) + ->Args({3<<10, 40}) + ->Args({8<<10, 40}) + ->Args({1<<10, 40}) + ->Args({1<<10, 60}) + ->Args({3<<10, 60}) + ->Args({8<<10, 60}) + ->Args({1<<10, 80}) + ->Args({3<<10, 80}) + ->Args({8<<10, 80}); +``` +{% endraw %} + +For the most common scenarios, helper methods for creating a list of +integers for a given sparse or dense range are provided. + +```c++ +BENCHMARK(BM_SetInsert) + ->ArgsProduct({ + benchmark::CreateRange(8, 128, /*multi=*/2), + benchmark::CreateDenseRange(1, 4, /*step=*/1) + }) +// would generate the same benchmark arguments as +BENCHMARK(BM_SetInsert) + ->ArgsProduct({ + {8, 16, 32, 64, 128}, + {1, 2, 3, 4} + }); +``` + +For more complex patterns of inputs, passing a custom function to `Apply` allows +programmatic specification of an arbitrary set of arguments on which to run the +benchmark. The following example enumerates a dense range on one parameter, +and a sparse range on the second. + +```c++ +static void CustomArguments(benchmark::internal::Benchmark* b) { + for (int i = 0; i <= 10; ++i) + for (int j = 32; j <= 1024*1024; j *= 8) + b->Args({i, j}); +} +BENCHMARK(BM_SetInsert)->Apply(CustomArguments); +``` + +### Passing Arbitrary Arguments to a Benchmark + +In C++11 it is possible to define a benchmark that takes an arbitrary number +of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` +macro creates a benchmark that invokes `func` with the `benchmark::State` as +the first argument followed by the specified `args...`. +The `test_case_name` is appended to the name of the benchmark and +should describe the values passed. + +```c++ +template +void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { + [...] +} +// Registers a benchmark named "BM_takes_args/int_string_test" that passes +// the specified values to `extra_args`. +BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); +``` + +Note that elements of `...args` may refer to global variables. Users should +avoid modifying global state inside of a benchmark. + + + +## Calculating Asymptotic Complexity (Big O) + +Asymptotic complexity might be calculated for a family of benchmarks. The +following code will calculate the coefficient for the high-order term in the +running time and the normalized root-mean square error of string comparison. + +```c++ +static void BM_StringCompare(benchmark::State& state) { + std::string s1(state.range(0), '-'); + std::string s2(state.range(0), '-'); + for (auto _ : state) { + benchmark::DoNotOptimize(s1.compare(s2)); + } + state.SetComplexityN(state.range(0)); +} +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); +``` + +As shown in the following invocation, asymptotic complexity might also be +calculated automatically. + +```c++ +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); +``` + +The following code will specify asymptotic complexity with a lambda function, +that might be used to customize high-order term calculation. + +```c++ +BENCHMARK(BM_StringCompare)->RangeMultiplier(2) + ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; }); +``` + + + +## Custom Benchmark Name + +You can change the benchmark's name as follows: + +```c++ +BENCHMARK(BM_memcpy)->Name("memcpy")->RangeMultiplier(2)->Range(8, 8<<10); +``` + +The invocation will execute the benchmark as before using `BM_memcpy` but changes +the prefix in the report to `memcpy`. + + + +## Templated Benchmarks + +This example produces and consumes messages of size `sizeof(v)` `range_x` +times. It also outputs throughput in the absence of multiprogramming. + +```c++ +template void BM_Sequential(benchmark::State& state) { + Q q; + typename Q::value_type v; + for (auto _ : state) { + for (int i = state.range(0); i--; ) + q.push(v); + for (int e = state.range(0); e--; ) + q.Wait(&v); + } + // actually messages, not bytes: + state.SetBytesProcessed( + static_cast(state.iterations())*state.range(0)); +} +// C++03 +BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); + +// C++11 or newer, you can use the BENCHMARK macro with template parameters: +BENCHMARK(BM_Sequential>)->Range(1<<0, 1<<10); + +``` + +Three macros are provided for adding benchmark templates. + +```c++ +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK(func<...>) // Takes any number of parameters. +#else // C++ < C++11 +#define BENCHMARK_TEMPLATE(func, arg1) +#endif +#define BENCHMARK_TEMPLATE1(func, arg1) +#define BENCHMARK_TEMPLATE2(func, arg1, arg2) +``` + + + +## Fixtures + +Fixture tests are created by first defining a type that derives from +`::benchmark::Fixture` and then creating/registering the tests using the +following macros: + +* `BENCHMARK_F(ClassName, Method)` +* `BENCHMARK_DEFINE_F(ClassName, Method)` +* `BENCHMARK_REGISTER_F(ClassName, Method)` + +For Example: + +```c++ +class MyFixture : public benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& state) { + } + + void TearDown(const ::benchmark::State& state) { + } +}; + +BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} +/* BarTest is NOT registered */ +BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); +/* BarTest is now registered */ +``` + +### Templated Fixtures + +Also you can create templated fixture by using the following macros: + +* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)` +* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)` + +For example: + +```c++ +template +class MyFixture : public benchmark::Fixture {}; + +BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2); +``` + + + +## Custom Counters + +You can add your own counters with user-defined names. The example below +will add columns "Foo", "Bar" and "Baz" in its output: + +```c++ +static void UserCountersExample1(benchmark::State& state) { + double numFoos = 0, numBars = 0, numBazs = 0; + for (auto _ : state) { + // ... count Foo,Bar,Baz events + } + state.counters["Foo"] = numFoos; + state.counters["Bar"] = numBars; + state.counters["Baz"] = numBazs; +} +``` + +The `state.counters` object is a `std::map` with `std::string` keys +and `Counter` values. The latter is a `double`-like class, via an implicit +conversion to `double&`. Thus you can use all of the standard arithmetic +assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter. + +In multithreaded benchmarks, each counter is set on the calling thread only. +When the benchmark finishes, the counters from each thread will be summed; +the resulting sum is the value which will be shown for the benchmark. + +The `Counter` constructor accepts three parameters: the value as a `double` +; a bit flag which allows you to show counters as rates, and/or as per-thread +iteration, and/or as per-thread averages, and/or iteration invariants, +and/or finally inverting the result; and a flag specifying the 'unit' - i.e. +is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024 +(`benchmark::Counter::OneK::kIs1024`)? + +```c++ + // sets a simple counter + state.counters["Foo"] = numFoos; + + // Set the counter as a rate. It will be presented divided + // by the duration of the benchmark. + // Meaning: per one second, how many 'foo's are processed? + state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); + + // Set the counter as a rate. It will be presented divided + // by the duration of the benchmark, and the result inverted. + // Meaning: how many seconds it takes to process one 'foo'? + state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); + + // Set the counter as a thread-average quantity. It will + // be presented divided by the number of threads. + state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); + + // There's also a combined flag: + state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate); + + // This says that we process with the rate of state.range(0) bytes every iteration: + state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024); +``` + +When you're compiling in C++11 mode or later you can use `insert()` with +`std::initializer_list`: + +{% raw %} +```c++ + // With C++11, this can be done: + state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); + // ... instead of: + state.counters["Foo"] = numFoos; + state.counters["Bar"] = numBars; + state.counters["Baz"] = numBazs; +``` +{% endraw %} + +### Counter Reporting + +When using the console reporter, by default, user counters are printed at +the end after the table, the same way as ``bytes_processed`` and +``items_processed``. This is best for cases in which there are few counters, +or where there are only a couple of lines per benchmark. Here's an example of +the default output: + +``` +------------------------------------------------------------------------------ +Benchmark Time CPU Iterations UserCounters... +------------------------------------------------------------------------------ +BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8 +BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m +BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2 +BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4 +BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8 +BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16 +BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32 +BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4 +BM_Factorial 26 ns 26 ns 26608979 40320 +BM_Factorial/real_time 26 ns 26 ns 26587936 40320 +BM_CalculatePiRange/1 16 ns 16 ns 45704255 0 +BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374 +BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746 +BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355 +``` + +If this doesn't suit you, you can print each counter as a table column by +passing the flag `--benchmark_counters_tabular=true` to the benchmark +application. This is best for cases in which there are a lot of counters, or +a lot of lines per individual benchmark. Note that this will trigger a +reprinting of the table header any time the counter set changes between +individual benchmarks. Here's an example of corresponding output when +`--benchmark_counters_tabular=true` is passed: + +``` +--------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations Bar Bat Baz Foo +--------------------------------------------------------------------------------------- +BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8 +BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1 +BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2 +BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4 +BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8 +BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16 +BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32 +BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4 +-------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------- +BM_Factorial 26 ns 26 ns 26392245 40320 +BM_Factorial/real_time 26 ns 26 ns 26494107 40320 +BM_CalculatePiRange/1 15 ns 15 ns 45571597 0 +BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374 +BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746 +BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355 +BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184 +BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162 +BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416 +BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159 +BM_CalculatePi/threads:8 2255 ns 9943 ns 70936 +``` + +Note above the additional header printed when the benchmark changes from +``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does +not have the same counter set as ``BM_UserCounter``. + + + +## Multithreaded Benchmarks + +In a multithreaded test (benchmark invoked by multiple threads simultaneously), +it is guaranteed that none of the threads will start until all have reached +the start of the benchmark loop, and all will have finished before any thread +exits the benchmark loop. (This behavior is also provided by the `KeepRunning()` +API) As such, any global setup or teardown can be wrapped in a check against the thread +index: + +```c++ +static void BM_MultiThreaded(benchmark::State& state) { + if (state.thread_index() == 0) { + // Setup code here. + } + for (auto _ : state) { + // Run the test as normal. + } + if (state.thread_index() == 0) { + // Teardown code here. + } +} +BENCHMARK(BM_MultiThreaded)->Threads(2); +``` + +If the benchmarked code itself uses threads and you want to compare it to +single-threaded code, you may want to use real-time ("wallclock") measurements +for latency comparisons: + +```c++ +BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); +``` + +Without `UseRealTime`, CPU time is used by default. + + + +## CPU Timers + +By default, the CPU timer only measures the time spent by the main thread. +If the benchmark itself uses threads internally, this measurement may not +be what you are looking for. Instead, there is a way to measure the total +CPU usage of the process, by all the threads. + +```c++ +void callee(int i); + +static void MyMain(int size) { +#pragma omp parallel for + for(int i = 0; i < size; i++) + callee(i); +} + +static void BM_OpenMP(benchmark::State& state) { + for (auto _ : state) + MyMain(state.range(0)); +} + +// Measure the time spent by the main thread, use it to decide for how long to +// run the benchmark loop. Depending on the internal implementation detail may +// measure to anywhere from near-zero (the overhead spent before/after work +// handoff to worker thread[s]) to the whole single-thread time. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10); + +// Measure the user-visible time, the wall clock (literally, the time that +// has passed on the clock on the wall), use it to decide for how long to +// run the benchmark loop. This will always be meaningful, an will match the +// time spent by the main thread in single-threaded case, in general decreasing +// with the number of internal threads doing the work. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime(); + +// Measure the total CPU consumption, use it to decide for how long to +// run the benchmark loop. This will always measure to no less than the +// time spent by the main thread in single-threaded case. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime(); + +// A mixture of the last two. Measure the total CPU consumption, but use the +// wall clock to decide for how long to run the benchmark loop. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime(); +``` + +### Controlling Timers + +Normally, the entire duration of the work loop (`for (auto _ : state) {}`) +is measured. But sometimes, it is necessary to do some work inside of +that loop, every iteration, but without counting that time to the benchmark time. +That is possible, although it is not recommended, since it has high overhead. + +{% raw %} +```c++ +static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { + std::set data; + for (auto _ : state) { + state.PauseTiming(); // Stop timers. They will not count until they are resumed. + data = ConstructRandomSet(state.range(0)); // Do something that should not be measured + state.ResumeTiming(); // And resume timers. They are now counting again. + // The rest will be measured. + for (int j = 0; j < state.range(1); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); +``` +{% endraw %} + + + +## Manual Timing + +For benchmarking something for which neither CPU time nor real-time are +correct or accurate enough, completely manual timing is supported using +the `UseManualTime` function. + +When `UseManualTime` is used, the benchmarked code must call +`SetIterationTime` once per iteration of the benchmark loop to +report the manually measured time. + +An example use case for this is benchmarking GPU execution (e.g. OpenCL +or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot +be accurately measured using CPU time or real-time. Instead, they can be +measured accurately using a dedicated API, and these measurement results +can be reported back with `SetIterationTime`. + +```c++ +static void BM_ManualTiming(benchmark::State& state) { + int microseconds = state.range(0); + std::chrono::duration sleep_duration { + static_cast(microseconds) + }; + + for (auto _ : state) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for(sleep_duration); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed_seconds = + std::chrono::duration_cast>( + end - start); + + state.SetIterationTime(elapsed_seconds.count()); + } +} +BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); +``` + + + +## Setting the Time Unit + +If a benchmark runs a few milliseconds it may be hard to visually compare the +measured times, since the output data is given in nanoseconds per default. In +order to manually set the time unit, you can specify it manually: + +```c++ +BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); +``` + + + +## Preventing Optimization + +To prevent a value or expression from being optimized away by the compiler +the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` +functions can be used. + +```c++ +static void BM_test(benchmark::State& state) { + for (auto _ : state) { + int x = 0; + for (int i=0; i < 64; ++i) { + benchmark::DoNotOptimize(x += i); + } + } +} +``` + +`DoNotOptimize()` forces the *result* of `` to be stored in either +memory or a register. For GNU based compilers it acts as read/write barrier +for global memory. More specifically it forces the compiler to flush pending +writes to memory and reload any other values as necessary. + +Note that `DoNotOptimize()` does not prevent optimizations on `` +in any way. `` may even be removed entirely when the result is already +known. For example: + +```c++ + /* Example 1: `` is removed entirely. */ + int foo(int x) { return x + 42; } + while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); + + /* Example 2: Result of '' is only reused */ + int bar(int) __attribute__((const)); + while (...) DoNotOptimize(bar(0)); // Optimized to: + // int __result__ = bar(0); + // while (...) DoNotOptimize(__result__); +``` + +The second tool for preventing optimizations is `ClobberMemory()`. In essence +`ClobberMemory()` forces the compiler to perform all pending writes to global +memory. Memory managed by block scope objects must be "escaped" using +`DoNotOptimize(...)` before it can be clobbered. In the below example +`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized +away. + +```c++ +static void BM_vector_push_back(benchmark::State& state) { + for (auto _ : state) { + std::vector v; + v.reserve(1); + benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered. + v.push_back(42); + benchmark::ClobberMemory(); // Force 42 to be written to memory. + } +} +``` + +Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. + + + +## Statistics: Reporting the Mean, Median and Standard Deviation / Coefficient of variation of Repeated Benchmarks + +By default each benchmark is run once and that single result is reported. +However benchmarks are often noisy and a single result may not be representative +of the overall behavior. For this reason it's possible to repeatedly rerun the +benchmark. + +The number of runs of each benchmark is specified globally by the +`--benchmark_repetitions` flag or on a per benchmark basis by calling +`Repetitions` on the registered benchmark object. When a benchmark is run more +than once the mean, median, standard deviation and coefficient of variation +of the runs will be reported. + +Additionally the `--benchmark_report_aggregates_only={true|false}`, +`--benchmark_display_aggregates_only={true|false}` flags or +`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be +used to change how repeated tests are reported. By default the result of each +repeated run is reported. When `report aggregates only` option is `true`, +only the aggregates (i.e. mean, median, standard deviation and coefficient +of variation, maybe complexity measurements if they were requested) of the runs +is reported, to both the reporters - standard output (console), and the file. +However when only the `display aggregates only` option is `true`, +only the aggregates are displayed in the standard output, while the file +output still contains everything. +Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a +registered benchmark object overrides the value of the appropriate flag for that +benchmark. + + + +## Custom Statistics + +While having these aggregates is nice, this may not be enough for everyone. +For example you may want to know what the largest observation is, e.g. because +you have some real-time constraints. This is easy. The following code will +specify a custom statistic to be calculated, defined by a lambda function. + +```c++ +void BM_spin_empty(benchmark::State& state) { + for (auto _ : state) { + for (int x = 0; x < state.range(0); ++x) { + benchmark::DoNotOptimize(x); + } + } +} + +BENCHMARK(BM_spin_empty) + ->ComputeStatistics("max", [](const std::vector& v) -> double { + return *(std::max_element(std::begin(v), std::end(v))); + }) + ->Arg(512); +``` + +While usually the statistics produce values in time units, +you can also produce percentages: + +```c++ +void BM_spin_empty(benchmark::State& state) { + for (auto _ : state) { + for (int x = 0; x < state.range(0); ++x) { + benchmark::DoNotOptimize(x); + } + } +} + +BENCHMARK(BM_spin_empty) + ->ComputeStatistics("ratio", [](const std::vector& v) -> double { + return std::begin(v) / std::end(v); + }, benchmark::StatisticUnit::Percentage) + ->Arg(512); +``` + + + +## Using RegisterBenchmark(name, fn, args...) + +The `RegisterBenchmark(name, func, args...)` function provides an alternative +way to create and register benchmarks. +`RegisterBenchmark(name, func, args...)` creates, registers, and returns a +pointer to a new benchmark with the specified `name` that invokes +`func(st, args...)` where `st` is a `benchmark::State` object. + +Unlike the `BENCHMARK` registration macros, which can only be used at the global +scope, the `RegisterBenchmark` can be called anywhere. This allows for +benchmark tests to be registered programmatically. + +Additionally `RegisterBenchmark` allows any callable object to be registered +as a benchmark. Including capturing lambdas and function objects. + +For Example: +```c++ +auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ }; + +int main(int argc, char** argv) { + for (auto& test_input : { /* ... */ }) + benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input); + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} +``` + + + +## Exiting with an Error + +When errors caused by external influences, such as file I/O and network +communication, occur within a benchmark the +`State::SkipWithError(const char* msg)` function can be used to skip that run +of benchmark and report the error. Note that only future iterations of the +`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop +Users must explicitly exit the loop, otherwise all iterations will be performed. +Users may explicitly return to exit the benchmark immediately. + +The `SkipWithError(...)` function may be used at any point within the benchmark, +including before and after the benchmark loop. Moreover, if `SkipWithError(...)` +has been used, it is not required to reach the benchmark loop and one may return +from the benchmark function early. + +For example: + +```c++ +static void BM_test(benchmark::State& state) { + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + // KeepRunning() loop will not be entered. + } + while (state.KeepRunning()) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // Needed to skip the rest of the iteration. + } + do_stuff(data); + } +} + +static void BM_test_ranged_fo(benchmark::State & state) { + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + return; // Early return is allowed when SkipWithError() has been used. + } + for (auto _ : state) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // REQUIRED to prevent all further iterations. + } + do_stuff(data); + } +} +``` + + +## A Faster KeepRunning Loop + +In C++11 mode, a ranged-based for loop should be used in preference to +the `KeepRunning` loop for running the benchmarks. For example: + +```c++ +static void BM_Fast(benchmark::State &state) { + for (auto _ : state) { + FastOperation(); + } +} +BENCHMARK(BM_Fast); +``` + +The reason the ranged-for loop is faster than using `KeepRunning`, is +because `KeepRunning` requires a memory load and store of the iteration count +ever iteration, whereas the ranged-for variant is able to keep the iteration count +in a register. + +For example, an empty inner loop of using the ranged-based for method looks like: + +```asm +# Loop Init + mov rbx, qword ptr [r14 + 104] + call benchmark::State::StartKeepRunning() + test rbx, rbx + je .LoopEnd +.LoopHeader: # =>This Inner Loop Header: Depth=1 + add rbx, -1 + jne .LoopHeader +.LoopEnd: +``` + +Compared to an empty `KeepRunning` loop, which looks like: + +```asm +.LoopHeader: # in Loop: Header=BB0_3 Depth=1 + cmp byte ptr [rbx], 1 + jne .LoopInit +.LoopBody: # =>This Inner Loop Header: Depth=1 + mov rax, qword ptr [rbx + 8] + lea rcx, [rax + 1] + mov qword ptr [rbx + 8], rcx + cmp rax, qword ptr [rbx + 104] + jb .LoopHeader + jmp .LoopEnd +.LoopInit: + mov rdi, rbx + call benchmark::State::StartKeepRunning() + jmp .LoopBody +.LoopEnd: +``` + +Unless C++03 compatibility is required, the ranged-for variant of writing +the benchmark loop should be preferred. + + + +## Disabling CPU Frequency Scaling + +If you see this error: + +``` +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +``` + +you might want to disable the CPU frequency scaling while running the benchmark: + +```bash +sudo cpupower frequency-set --governor performance +./mybench +sudo cpupower frequency-set --governor powersave +``` diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 01f1262..c8ced38 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -34,7 +34,7 @@ static void BM_StringCopy(benchmark::State& state) { BENCHMARK(BM_StringCopy); // Augment the main() program to invoke benchmarks if specified -// via the --benchmarks command line flag. E.g., +// via the --benchmark_filter command line flag. E.g., // my_unittest --benchmark_filter=all // my_unittest --benchmark_filter=BM_StringCreation // my_unittest --benchmark_filter=String @@ -42,6 +42,7 @@ BENCHMARK(BM_StringCopy); int main(int argc, char** argv) { benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); return 0; } @@ -139,13 +140,13 @@ thread exits the loop body. As such, any global setup or teardown you want to do can be wrapped in a check against the thread index: static void BM_MultiThreaded(benchmark::State& state) { - if (state.thread_index == 0) { + if (state.thread_index() == 0) { // Setup code here. } for (auto _ : state) { // Run the test as normal. } - if (state.thread_index == 0) { + if (state.thread_index() == 0) { // Teardown code here. } } @@ -167,12 +168,19 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #define BENCHMARK_HAS_CXX11 #endif +// This _MSC_VER check should detect VS 2017 v15.3 and newer. +#if __cplusplus >= 201703L || \ + (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L) +#define BENCHMARK_HAS_CXX17 +#endif + #include #include #include #include #include +#include #include #include #include @@ -180,6 +188,7 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #include #if defined(BENCHMARK_HAS_CXX11) +#include #include #include #include @@ -199,13 +208,19 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); TypeName& operator=(const TypeName&) = delete #endif -#if defined(__GNUC__) +#ifdef BENCHMARK_HAS_CXX17 +#define BENCHMARK_UNUSED [[maybe_unused]] +#elif defined(__GNUC__) || defined(__clang__) #define BENCHMARK_UNUSED __attribute__((unused)) +#else +#define BENCHMARK_UNUSED +#endif + +#if defined(__GNUC__) || defined(__clang__) #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) #define BENCHMARK_NOEXCEPT noexcept #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) #elif defined(_MSC_VER) && !defined(__clang__) -#define BENCHMARK_UNUSED #define BENCHMARK_ALWAYS_INLINE __forceinline #if _MSC_VER >= 1900 #define BENCHMARK_NOEXCEPT noexcept @@ -216,7 +231,6 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #endif #define __func__ __FUNCTION__ #else -#define BENCHMARK_UNUSED #define BENCHMARK_ALWAYS_INLINE #define BENCHMARK_NOEXCEPT #define BENCHMARK_NOEXCEPT_OP(x) @@ -225,16 +239,24 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #define BENCHMARK_INTERNAL_TOSTRING2(x) #x #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) +// clang-format off #if defined(__GNUC__) || defined(__clang__) #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) +#define BENCHMARK_DISABLE_DEPRECATED_WARNING \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop") #else #define BENCHMARK_BUILTIN_EXPECT(x, y) x #define BENCHMARK_DEPRECATED_MSG(msg) #define BENCHMARK_WARNING_MSG(msg) \ __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \ __LINE__) ") : warning note: " msg)) +#define BENCHMARK_DISABLE_DEPRECATED_WARNING +#define BENCHMARK_RESTORE_DEPRECATED_WARNING #endif +// clang-format on #if defined(__GNUC__) && !defined(__clang__) #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) @@ -252,21 +274,34 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #define BENCHMARK_UNREACHABLE() ((void)0) #endif +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_OVERRIDE override +#else +#define BENCHMARK_OVERRIDE +#endif + namespace benchmark { class BenchmarkReporter; -class MemoryManager; void Initialize(int* argc, char** argv); +void Shutdown(); // Report to stdout all arguments in 'argv' as unrecognized except the first. // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1). bool ReportUnrecognizedArguments(int argc, char** argv); +// Returns the current value of --benchmark_filter. +std::string GetBenchmarkFilter(); + // Generate a list of benchmarks matching the specified --benchmark_filter flag // and if --benchmark_list_tests is specified return after printing the name // of each matching benchmark. Otherwise run each matching benchmark and // report the results. // +// spec : Specify the benchmarks to run. If users do not specify this arg, +// then the value of FLAGS_benchmark_filter +// will be used. +// // The second and third overload use the specified 'display_reporter' and // 'file_reporter' respectively. 'file_reporter' will write to the file // specified @@ -275,14 +310,70 @@ bool ReportUnrecognizedArguments(int argc, char** argv); // // RETURNS: The number of matching benchmarks. size_t RunSpecifiedBenchmarks(); +size_t RunSpecifiedBenchmarks(std::string spec); + size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + std::string spec); + size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter); +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, + std::string spec); + +// If a MemoryManager is registered (via RegisterMemoryManager()), +// it can be used to collect and report allocation metrics for a run of the +// benchmark. +class MemoryManager { + public: + static const int64_t TombstoneValue; + + struct Result { + Result() + : num_allocs(0), + max_bytes_used(0), + total_allocated_bytes(TombstoneValue), + net_heap_growth(TombstoneValue) {} + + // The number of allocations made in total between Start and Stop. + int64_t num_allocs; + + // The peak memory use between Start and Stop. + int64_t max_bytes_used; + + // The total memory allocated, in bytes, between Start and Stop. + // Init'ed to TombstoneValue if metric not available. + int64_t total_allocated_bytes; + + // The net changes in memory, in bytes, between Start and Stop. + // ie., total_allocated_bytes - total_deallocated_bytes. + // Init'ed to TombstoneValue if metric not available. + int64_t net_heap_growth; + }; + + virtual ~MemoryManager() {} + + // Implement this to start recording allocation information. + virtual void Start() = 0; + + // Implement this to stop recording and fill out the given Result structure. + BENCHMARK_DEPRECATED_MSG("Use Stop(Result&) instead") + virtual void Stop(Result* result) = 0; + + // FIXME(vyng): Make this pure virtual once we've migrated current users. + BENCHMARK_DISABLE_DEPRECATED_WARNING + virtual void Stop(Result& result) { Stop(&result); } + BENCHMARK_RESTORE_DEPRECATED_WARNING +}; // Register a MemoryManager instance that will be used to collect and report // allocation measurements for benchmark runs. void RegisterMemoryManager(MemoryManager* memory_manager); +// Add a key-value pair to output as part of the context stanza in the report. +void AddCustomContext(const std::string& key, const std::string& value); + namespace internal { class Benchmark; class BenchmarkImp; @@ -305,6 +396,14 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY #endif +// Force the compiler to flush pending writes to global memory. Acts as an +// effective read/write barrier +#ifdef BENCHMARK_HAS_CXX11 +inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { + std::atomic_signal_fence(std::memory_order_acq_rel); +} +#endif + // The DoNotOptimize(...) function can be used to prevent a value or // expression from being optimized away by the compiler. This function is // intended to add little to no overhead. @@ -324,11 +423,11 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { #endif } -// Force the compiler to flush pending writes to global memory. Acts as an -// effective read/write barrier +#ifndef BENCHMARK_HAS_CXX11 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { asm volatile("" : : : "memory"); } +#endif #elif defined(_MSC_VER) template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { @@ -336,13 +435,15 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { _ReadWriteBarrier(); } +#ifndef BENCHMARK_HAS_CXX11 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); } +#endif #else template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast(value)); } -// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers +// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11. #endif // This class is used for user-defined counters. @@ -352,27 +453,27 @@ class Counter { kDefaults = 0, // Mark the counter as a rate. It will be presented divided // by the duration of the benchmark. - kIsRate = 1U << 0U, + kIsRate = 1 << 0, // Mark the counter as a thread-average quantity. It will be // presented divided by the number of threads. - kAvgThreads = 1U << 1U, + kAvgThreads = 1 << 1, // Mark the counter as a thread-average rate. See above. kAvgThreadsRate = kIsRate | kAvgThreads, // Mark the counter as a constant value, valid/same for *every* iteration. // When reporting, it will be *multiplied* by the iteration count. - kIsIterationInvariant = 1U << 2U, + kIsIterationInvariant = 1 << 2, // Mark the counter as a constant rate. // When reporting, it will be *multiplied* by the iteration count // and then divided by the duration of the benchmark. kIsIterationInvariantRate = kIsRate | kIsIterationInvariant, // Mark the counter as a iteration-average quantity. // It will be presented divided by the number of iterations. - kAvgIterations = 1U << 3U, + kAvgIterations = 1 << 3, // Mark the counter as a iteration-average rate. See above. kAvgIterationsRate = kIsRate | kAvgIterations, // In the end, invert the result. This is always done last! - kInvert = 1U << 31U + kInvert = 1 << 31 }; enum OneK { @@ -390,7 +491,7 @@ class Counter { Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000) : value(v), flags(f), oneK(k) {} - BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; } + BENCHMARK_ALWAYS_INLINE operator double const &() const { return value; } BENCHMARK_ALWAYS_INLINE operator double&() { return value; } }; @@ -407,7 +508,7 @@ typedef std::map UserCounters; // TimeUnit is passed to a benchmark in order to specify the order of magnitude // for the measured time. -enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond }; +enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; // BigO is passed to a benchmark in order to specify the asymptotic // computational @@ -417,6 +518,8 @@ enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda }; typedef uint64_t IterationCount; +enum StatisticUnit { kTime, kPercentage }; + // BigOFunc is passed to a benchmark in order to specify the asymptotic // computational complexity for the benchmark. typedef double(BigOFunc)(IterationCount); @@ -429,14 +532,17 @@ namespace internal { struct Statistics { std::string name_; StatisticsFunc* compute_; + StatisticUnit unit_; - Statistics(const std::string& name, StatisticsFunc* compute) - : name_(name), compute_(compute) {} + Statistics(const std::string& name, StatisticsFunc* compute, + StatisticUnit unit = kTime) + : name_(name), compute_(compute), unit_(unit) {} }; -struct BenchmarkInstance; +class BenchmarkInstance; class ThreadTimer; class ThreadManager; +class PerfCountersMeasurement; enum AggregationReportMode #if defined(BENCHMARK_HAS_CXX11) @@ -633,6 +739,14 @@ class State { BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead") int64_t range_y() const { return range(1); } + // Number of threads concurrently executing the benchmark. + BENCHMARK_ALWAYS_INLINE + int threads() const { return threads_; } + + // Index of the executing thread. Values from [0, threads). + BENCHMARK_ALWAYS_INLINE + int thread_index() const { return thread_index_; } + BENCHMARK_ALWAYS_INLINE IterationCount iterations() const { if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { @@ -641,8 +755,8 @@ class State { return max_iterations - total_iterations_ + batch_leftover_; } - private - : // items we expect on the first cache line (ie 64 bytes of the struct) + private: + // items we expect on the first cache line (ie 64 bytes of the struct) // When total_iterations_ is 0, KeepRunning() and friends will return false. // May be larger than max_iterations. IterationCount total_iterations_; @@ -660,7 +774,7 @@ class State { bool finished_; bool error_occurred_; - private: // items we don't need on the first cache line + // items we don't need on the first cache line std::vector range_; int64_t complexity_n_; @@ -668,25 +782,27 @@ class State { public: // Container for user-defined counters. UserCounters counters; - // Index of the executing thread. Values from [0, threads). - const int thread_index; - // Number of threads concurrently executing the benchmark. - const int threads; private: State(IterationCount max_iters, const std::vector& ranges, int thread_i, int n_threads, internal::ThreadTimer* timer, - internal::ThreadManager* manager); + internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement); void StartKeepRunning(); // Implementation of KeepRunning() and KeepRunningBatch(). // is_batch must be true unless n is 1. bool KeepRunningInternal(IterationCount n, bool is_batch); void FinishKeepRunning(); - internal::ThreadTimer* timer_; - internal::ThreadManager* manager_; - friend struct internal::BenchmarkInstance; + const int thread_index_; + const int threads_; + + internal::ThreadTimer* const timer_; + internal::ThreadManager* const manager_; + internal::PerfCountersMeasurement* const perf_counters_measurement_; + + friend class internal::BenchmarkInstance; }; inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() { @@ -790,6 +906,9 @@ class Benchmark { // Note: the following methods all return "this" so that multiple // method calls can be chained together in one expression. + // Specify the name of the benchmark + Benchmark* Name(const std::string& name); + // Run this benchmark once with "x" as the extra argument passed // to the function. // REQUIRES: The function passed to the constructor must accept an arg1. @@ -850,6 +969,23 @@ class Benchmark { return Ranges(ranges); } + // Have "setup" and/or "teardown" invoked once for every benchmark run. + // If the benchmark is multi-threaded (will run in k threads concurrently), + // the setup callback will be be invoked exactly once (not k times) before + // each run with k threads. Time allowing (e.g. for a short benchmark), there + // may be multiple such runs per benchmark, each run with its own + // "setup"/"teardown". + // + // If the benchmark uses different size groups of threads (e.g. via + // ThreadRange), the above will be true for each size group. + // + // The callback will be passed a State object, which includes the number + // of threads, thread-index, benchmark arguments, etc. + // + // The callback must not be NULL or self-deleting. + Benchmark* Setup(void (*setup)(const benchmark::State&)); + Benchmark* Teardown(void (*teardown)(const benchmark::State&)); + // Pass this benchmark object to *func, which can customize // the benchmark by calling various methods like Arg, Args, // Threads, etc. @@ -918,7 +1054,9 @@ class Benchmark { Benchmark* Complexity(BigOFunc* complexity); // Add this statistics to be computed over all the values of benchmark run - Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics); + Benchmark* ComputeStatistics(const std::string& name, + StatisticsFunc* statistics, + StatisticUnit unit = kTime); // Support for running multiple copies of the same benchmark concurrently // in multiple threads. This may be useful when measuring the scaling @@ -961,6 +1099,7 @@ class Benchmark { private: friend class BenchmarkFamilies; + friend class BenchmarkInstance; std::string name_; AggregationReportMode aggregation_report_mode_; @@ -979,6 +1118,10 @@ class Benchmark { std::vector statistics_; std::vector thread_counts_; + typedef void (*callback_function)(const benchmark::State&); + callback_function setup_; + callback_function teardown_; + Benchmark& operator=(Benchmark const&); }; @@ -1008,7 +1151,7 @@ class FunctionBenchmark : public Benchmark { FunctionBenchmark(const char* name, Function* func) : Benchmark(name), func_(func) {} - virtual void Run(State& st); + virtual void Run(State& st) BENCHMARK_OVERRIDE; private: Function* func_; @@ -1018,7 +1161,7 @@ class FunctionBenchmark : public Benchmark { template class LambdaBenchmark : public Benchmark { public: - virtual void Run(State& st) { lambda_(st); } + virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); } private: template @@ -1027,8 +1170,7 @@ class LambdaBenchmark : public Benchmark { LambdaBenchmark(LambdaBenchmark const&) = delete; - private: - template + template // NOLINTNEXTLINE(readability-redundant-declaration) friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&); Lambda lambda_; @@ -1070,7 +1212,7 @@ class Fixture : public internal::Benchmark { public: Fixture() : internal::Benchmark("") {} - virtual void Run(State& st) { + virtual void Run(State& st) BENCHMARK_OVERRIDE { this->SetUp(st); this->BenchmarkCase(st); this->TearDown(st); @@ -1102,19 +1244,37 @@ class Fixture : public internal::Benchmark { #endif // Helpers for generating unique variable names +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_PRIVATE_NAME(...) \ + BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \ + __VA_ARGS__) +#else #define BENCHMARK_PRIVATE_NAME(n) \ - BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n) + BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n) +#endif // BENCHMARK_HAS_CXX11 + #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c +// Helper for concatenation with macro name expansion +#define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \ + BaseClass##_##Method##_Benchmark #define BENCHMARK_PRIVATE_DECLARE(n) \ static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \ BENCHMARK_UNUSED +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK(...) \ + BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \ + &__VA_ARGS__))) +#else #define BENCHMARK(n) \ BENCHMARK_PRIVATE_DECLARE(n) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ new ::benchmark::internal::FunctionBenchmark(#n, n))) +#endif // BENCHMARK_HAS_CXX11 // Old-style macros #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) @@ -1175,49 +1335,49 @@ class Fixture : public internal::Benchmark { #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) #endif -#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() : BaseClass() { \ - this->SetName(#BaseClass "/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&); \ +#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() { \ + this->SetName(#BaseClass "/" #Method); \ + } \ + \ + protected: \ + virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; -#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() : BaseClass() { \ - this->SetName(#BaseClass "<" #a ">/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&); \ +#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() { \ + this->SetName(#BaseClass "<" #a ">/" #Method); \ + } \ + \ + protected: \ + virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; -#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() : BaseClass() { \ - this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&); \ +#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() { \ + this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ + } \ + \ + protected: \ + virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; #ifdef BENCHMARK_HAS_CXX11 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \ class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \ public: \ - BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() { \ + BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \ } \ \ protected: \ - virtual void BenchmarkCase(::benchmark::State&); \ + virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; #else #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \ @@ -1226,27 +1386,27 @@ class Fixture : public internal::Benchmark { #define BENCHMARK_DEFINE_F(BaseClass, Method) \ BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ - void BaseClass##_##Method##_Benchmark::BenchmarkCase + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \ BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ - void BaseClass##_##Method##_Benchmark::BenchmarkCase + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \ BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ - void BaseClass##_##Method##_Benchmark::BenchmarkCase + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #ifdef BENCHMARK_HAS_CXX11 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \ BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ - void BaseClass##_##Method##_Benchmark::BenchmarkCase + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #else #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \ BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) #endif #define BENCHMARK_REGISTER_F(BaseClass, Method) \ - BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark) + BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)) #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \ BENCHMARK_PRIVATE_DECLARE(TestName) = \ @@ -1256,23 +1416,23 @@ class Fixture : public internal::Benchmark { #define BENCHMARK_F(BaseClass, Method) \ BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ BENCHMARK_REGISTER_F(BaseClass, Method); \ - void BaseClass##_##Method##_Benchmark::BenchmarkCase + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \ BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ BENCHMARK_REGISTER_F(BaseClass, Method); \ - void BaseClass##_##Method##_Benchmark::BenchmarkCase + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \ BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ BENCHMARK_REGISTER_F(BaseClass, Method); \ - void BaseClass##_##Method##_Benchmark::BenchmarkCase + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #ifdef BENCHMARK_HAS_CXX11 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \ BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ BENCHMARK_REGISTER_F(BaseClass, Method); \ - void BaseClass##_##Method##_Benchmark::BenchmarkCase + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase #else #define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \ BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) @@ -1284,6 +1444,8 @@ class Fixture : public internal::Benchmark { ::benchmark::Initialize(&argc, argv); \ if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ ::benchmark::RunSpecifiedBenchmarks(); \ + ::benchmark::Shutdown(); \ + return 0; \ } \ int main(int, char**) @@ -1300,16 +1462,12 @@ struct CPUInfo { int num_sharing; }; - enum Scaling { - UNKNOWN, - ENABLED, - DISABLED - }; + enum Scaling { UNKNOWN, ENABLED, DISABLED }; int num_cpus; + Scaling scaling; double cycles_per_second; std::vector caches; - Scaling scaling; std::vector load_avg; static const CPUInfo& Get(); @@ -1368,6 +1526,7 @@ class BenchmarkReporter { Run() : run_type(RT_Iteration), + aggregate_unit(kTime), error_occurred(false), iterations(1), threads(1), @@ -1380,15 +1539,16 @@ class BenchmarkReporter { complexity_n(0), report_big_o(false), report_rms(false), - counters(), - has_memory_result(false), - allocs_per_iter(0.0), - max_bytes_used(0) {} + memory_result(NULL), + allocs_per_iter(0.0) {} std::string benchmark_name() const; BenchmarkName run_name; + int64_t family_index; + int64_t per_family_instance_index; RunType run_type; std::string aggregate_name; + StatisticUnit aggregate_unit; std::string report_label; // Empty if not set by benchmark. bool error_occurred; std::string error_message; @@ -1431,9 +1591,21 @@ class BenchmarkReporter { UserCounters counters; // Memory metrics. - bool has_memory_result; + const MemoryManager::Result* memory_result; double allocs_per_iter; - int64_t max_bytes_used; + }; + + struct PerFamilyRunReports { + PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {} + + // How many runs will all instances of this benchmark perform? + int num_runs_total; + + // How many runs have happened already? + int num_runs_done; + + // The reports about (non-errneous!) runs of this family. + std::vector Runs; }; // Construct a BenchmarkReporter with the output stream set to 'std::cout' @@ -1503,13 +1675,10 @@ class ConsoleReporter : public BenchmarkReporter { OO_Defaults = OO_ColorTabular }; explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults) - : output_options_(opts_), - name_field_width_(0), - prev_counters_(), - printed_header_(false) {} + : output_options_(opts_), name_field_width_(0), printed_header_(false) {} - virtual bool ReportContext(const Context& context); - virtual void ReportRuns(const std::vector& reports); + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; protected: virtual void PrintRunData(const Run& report); @@ -1524,9 +1693,9 @@ class ConsoleReporter : public BenchmarkReporter { class JSONReporter : public BenchmarkReporter { public: JSONReporter() : first_report_(true) {} - virtual bool ReportContext(const Context& context); - virtual void ReportRuns(const std::vector& reports); - virtual void Finalize(); + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; + virtual void Finalize() BENCHMARK_OVERRIDE; private: void PrintRunData(const Run& report); @@ -1539,8 +1708,8 @@ class BENCHMARK_DEPRECATED_MSG( : public BenchmarkReporter { public: CSVReporter() : printed_header_(false) {} - virtual bool ReportContext(const Context& context); - virtual void ReportRuns(const std::vector& reports); + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; private: void PrintRunData(const Run& report); @@ -1549,31 +1718,10 @@ class BENCHMARK_DEPRECATED_MSG( std::set user_counter_names_; }; -// If a MemoryManager is registered, it can be used to collect and report -// allocation metrics for a run of the benchmark. -class MemoryManager { - public: - struct Result { - Result() : num_allocs(0), max_bytes_used(0) {} - - // The number of allocations made in total between Start and Stop. - int64_t num_allocs; - - // The peak memory use between Start and Stop. - int64_t max_bytes_used; - }; - - virtual ~MemoryManager() {} - - // Implement this to start recording allocation information. - virtual void Start() = 0; - - // Implement this to stop recording and fill out the given Result structure. - virtual void Stop(Result* result) = 0; -}; - inline const char* GetTimeUnitString(TimeUnit unit) { switch (unit) { + case kSecond: + return "s"; case kMillisecond: return "ms"; case kMicrosecond: @@ -1586,6 +1734,8 @@ inline const char* GetTimeUnitString(TimeUnit unit) { inline double GetTimeUnitMultiplier(TimeUnit unit) { switch (unit) { + case kSecond: + return 1; case kMillisecond: return 1e3; case kMicrosecond: @@ -1596,6 +1746,20 @@ inline double GetTimeUnitMultiplier(TimeUnit unit) { BENCHMARK_UNREACHABLE(); } +// Creates a list of integer values for the given range and multiplier. +// This can be used together with ArgsProduct() to allow multiple ranges +// with different multiplers. +// Example: +// ArgsProduct({ +// CreateRange(0, 1024, /*multi=*/32), +// CreateRange(0, 100, /*multi=*/4), +// CreateDenseRange(0, 4, /*step=*/1), +// }); +std::vector CreateRange(int64_t lo, int64_t hi, int multi); + +// Creates a list of integer values for the given range and step. +std::vector CreateDenseRange(int64_t start, int64_t limit, int step); + } // namespace benchmark #endif // BENCHMARK_BENCHMARK_H_ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e451894 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +numpy == 1.19.4 +scipy == 1.5.4 +pandas == 1.1.5 diff --git a/setup.py b/setup.py index 5cdab10..4eaccf8 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import os import posixpath +import platform import re import shutil import sys @@ -89,6 +90,8 @@ class BuildBazelExtension(build_ext.build_ext): # Link with python*.lib. for library_dir in self.library_dirs: bazel_argv.append("--linkopt=/LIBPATH:" + library_dir) + elif sys.platform == "darwin" and platform.machine() == "x86_64": + bazel_argv.append("--macos_minimum_os=10.9") self.spawn(bazel_argv) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 35d559e..e814a4e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,32 +25,32 @@ set_target_properties(benchmark PROPERTIES SOVERSION ${GENERIC_LIB_SOVERSION} ) target_include_directories(benchmark PUBLIC - $ - ) + $) + +# libpfm, if available +if (HAVE_LIBPFM) + target_link_libraries(benchmark PRIVATE pfm) + add_definitions(-DHAVE_LIBPFM) +endif() # Link threads. -target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) -find_library(LIBRT rt) -if(LIBRT) - target_link_libraries(benchmark ${LIBRT}) -endif() +target_link_libraries(benchmark PRIVATE Threads::Threads) + +target_link_libraries(benchmark PRIVATE ${BENCHMARK_CXX_LIBRARIES}) + +if(HAVE_LIB_RT) + target_link_libraries(benchmark PRIVATE rt) +endif(HAVE_LIB_RT) -if(CMAKE_BUILD_TYPE) - string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) -endif() -if(NOT CMAKE_THREAD_LIBS_INIT AND "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}" MATCHES ".*-fsanitize=[^ ]*address.*") - message(WARNING "CMake's FindThreads.cmake did not fail, but CMAKE_THREAD_LIBS_INIT ended up being empty. This was fixed in https://github.com/Kitware/CMake/commit/d53317130e84898c5328c237186dbd995aaf1c12 Let's guess that -pthread is sufficient.") - target_link_libraries(benchmark -pthread) -endif() # We need extra libraries on Windows if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") - target_link_libraries(benchmark shlwapi) + target_link_libraries(benchmark PRIVATE shlwapi) endif() # We need extra libraries on Solaris if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS") - target_link_libraries(benchmark kstat) + target_link_libraries(benchmark PRIVATE kstat) endif() # Benchmark main library @@ -61,33 +61,44 @@ set_target_properties(benchmark_main PROPERTIES VERSION ${GENERIC_LIB_VERSION} SOVERSION ${GENERIC_LIB_SOVERSION} ) -target_include_directories(benchmark PUBLIC - $ - ) -target_link_libraries(benchmark_main benchmark::benchmark) +target_link_libraries(benchmark_main PUBLIC benchmark::benchmark) -set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") +set(generated_dir "${PROJECT_BINARY_DIR}") set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake") set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc") +set(targets_to_export benchmark benchmark_main) set(targets_export_name "${PROJECT_NAME}Targets") set(namespace "${PROJECT_NAME}::") include(CMakePackageConfigHelpers) + +configure_package_config_file ( + ${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in + ${project_config} + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO +) write_basic_package_version_file( "${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion ) -configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY) configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY) +export ( + TARGETS ${targets_to_export} + NAMESPACE "${namespace}" + FILE ${generated_dir}/${targets_export_name}.cmake +) + if (BENCHMARK_ENABLE_INSTALL) # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable) install( - TARGETS benchmark benchmark_main + TARGETS ${targets_to_export} EXPORT ${targets_export_name} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} @@ -112,3 +123,37 @@ if (BENCHMARK_ENABLE_INSTALL) NAMESPACE "${namespace}" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") endif() + +if (BENCHMARK_ENABLE_DOXYGEN) + find_package(Doxygen REQUIRED) + set(DOXYGEN_QUIET YES) + set(DOXYGEN_RECURSIVE YES) + set(DOXYGEN_GENERATE_HTML YES) + set(DOXYGEN_GENERATE_MAN NO) + set(DOXYGEN_MARKDOWN_SUPPORT YES) + set(DOXYGEN_BUILTIN_STL_SUPPORT YES) + set(DOXYGEN_EXTRACT_PACKAGE YES) + set(DOXYGEN_EXTRACT_STATIC YES) + set(DOXYGEN_SHOW_INCLUDE_FILES YES) + set(DOXYGEN_BINARY_TOC YES) + set(DOXYGEN_TOC_EXPAND YES) + set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "index.md") + doxygen_add_docs(benchmark_doxygen + docs + include + src + ALL + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMENT "Building documentation with Doxygen.") + if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS) + install( + DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html/" + DESTINATION ${CMAKE_INSTALL_DOCDIR}) + endif() +else() + if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS) + install( + DIRECTORY "${PROJECT_SOURCE_DIR}/docs/" + DESTINATION ${CMAKE_INSTALL_DOCDIR}) + endif() +endif() diff --git a/src/benchmark.cc b/src/benchmark.cc index 1c049f2..cedeee3 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "benchmark/benchmark.h" + #include "benchmark_api_internal.h" #include "benchmark_runner.h" #include "internal_macros.h" @@ -32,7 +33,10 @@ #include #include #include +#include +#include #include +#include #include #include #include @@ -45,71 +49,85 @@ #include "internal_macros.h" #include "log.h" #include "mutex.h" +#include "perf_counters.h" #include "re.h" #include "statistics.h" #include "string_util.h" #include "thread_manager.h" #include "thread_timer.h" +namespace benchmark { // Print a list of benchmarks. This option overrides all other options. -DEFINE_bool(benchmark_list_tests, false); +BM_DEFINE_bool(benchmark_list_tests, false); // A regular expression that specifies the set of benchmarks to execute. If // this flag is empty, or if this flag is the string \"all\", all benchmarks // linked into the binary are run. -DEFINE_string(benchmark_filter, "."); +BM_DEFINE_string(benchmark_filter, ""); // Minimum number of seconds we should run benchmark before results are // considered significant. For cpu-time based tests, this is the lower bound // on the total cpu time used by all threads that make up the test. For // real-time based tests, this is the lower bound on the elapsed time of the // benchmark execution, regardless of number of threads. -DEFINE_double(benchmark_min_time, 0.5); +BM_DEFINE_double(benchmark_min_time, 0.5); // The number of runs of each benchmark. If greater than 1, the mean and // standard deviation of the runs will be reported. -DEFINE_int32(benchmark_repetitions, 1); +BM_DEFINE_int32(benchmark_repetitions, 1); + +// If set, enable random interleaving of repetitions of all benchmarks. +// See http://github.com/google/benchmark/issues/1051 for details. +BM_DEFINE_bool(benchmark_enable_random_interleaving, false); // Report the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are reported for // repeated benchmarks. Affects all reporters. -DEFINE_bool(benchmark_report_aggregates_only, false); +BM_DEFINE_bool(benchmark_report_aggregates_only, false); // Display the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are displayed for // repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects // the display reporter, but *NOT* file reporter, which will still contain // all the output. -DEFINE_bool(benchmark_display_aggregates_only, false); +BM_DEFINE_bool(benchmark_display_aggregates_only, false); // The format to use for console output. // Valid values are 'console', 'json', or 'csv'. -DEFINE_string(benchmark_format, "console"); +BM_DEFINE_string(benchmark_format, "console"); // The format to use for file output. // Valid values are 'console', 'json', or 'csv'. -DEFINE_string(benchmark_out_format, "json"); +BM_DEFINE_string(benchmark_out_format, "json"); // The file to write additional output to. -DEFINE_string(benchmark_out, ""); +BM_DEFINE_string(benchmark_out, ""); // Whether to use colors in the output. Valid values: // 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if // the output is being sent to a terminal and the TERM environment variable is // set to a terminal type that supports colors. -DEFINE_string(benchmark_color, "auto"); +BM_DEFINE_string(benchmark_color, "auto"); // Whether to use tabular format when printing user counters to the console. // Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false. -DEFINE_bool(benchmark_counters_tabular, false); +BM_DEFINE_bool(benchmark_counters_tabular, false); + +// List of additional perf counters to collect, in libpfm format. For more +// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html +BM_DEFINE_string(benchmark_perf_counters, ""); + +// Extra context to include in the output formatted as comma-separated key-value +// pairs. Kept internal as it's only used for parsing from env/command line. +BM_DEFINE_kvpairs(benchmark_context, {}); // The level of verbose logging to output -DEFINE_int32(v, 0); - -namespace benchmark { +BM_DEFINE_int32(v, 0); namespace internal { +std::map* global_context = nullptr; + // FIXME: wouldn't LTO mess this up? void UseCharPointer(char const volatile*) {} @@ -117,7 +135,8 @@ void UseCharPointer(char const volatile*) {} State::State(IterationCount max_iters, const std::vector& ranges, int thread_i, int n_threads, internal::ThreadTimer* timer, - internal::ThreadManager* manager) + internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement) : total_iterations_(0), batch_leftover_(0), max_iterations(max_iters), @@ -126,13 +145,14 @@ State::State(IterationCount max_iters, const std::vector& ranges, error_occurred_(false), range_(ranges), complexity_n_(0), - counters(), - thread_index(thread_i), - threads(n_threads), + thread_index_(thread_i), + threads_(n_threads), timer_(timer), - manager_(manager) { - CHECK(max_iterations != 0) << "At least one iteration must be run"; - CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; + manager_(manager), + perf_counters_measurement_(perf_counters_measurement) { + BM_CHECK(max_iterations != 0) << "At least one iteration must be run"; + BM_CHECK_LT(thread_index_, threads_) + << "thread_index must be less than threads"; // Note: The use of offsetof below is technically undefined until C++17 // because State is not a standard layout type. However, all compilers @@ -161,17 +181,29 @@ State::State(IterationCount max_iters, const std::vector& ranges, void State::PauseTiming() { // Add in time accumulated so far - CHECK(started_ && !finished_ && !error_occurred_); + BM_CHECK(started_ && !finished_ && !error_occurred_); timer_->StopTimer(); + if (perf_counters_measurement_) { + auto measurements = perf_counters_measurement_->StopAndGetMeasurements(); + for (const auto& name_and_measurement : measurements) { + auto name = name_and_measurement.first; + auto measurement = name_and_measurement.second; + BM_CHECK_EQ(counters[name], 0.0); + counters[name] = Counter(measurement, Counter::kAvgIterations); + } + } } void State::ResumeTiming() { - CHECK(started_ && !finished_ && !error_occurred_); + BM_CHECK(started_ && !finished_ && !error_occurred_); timer_->StartTimer(); + if (perf_counters_measurement_) { + perf_counters_measurement_->Start(); + } } void State::SkipWithError(const char* msg) { - CHECK(msg); + BM_CHECK(msg); error_occurred_ = true; { MutexLock l(manager_->GetBenchmarkMutex()); @@ -194,7 +226,7 @@ void State::SetLabel(const char* label) { } void State::StartKeepRunning() { - CHECK(!started_ && !finished_); + BM_CHECK(!started_ && !finished_); started_ = true; total_iterations_ = error_occurred_ ? 0 : max_iterations; manager_->StartStopBarrier(); @@ -202,7 +234,7 @@ void State::StartKeepRunning() { } void State::FinishKeepRunning() { - CHECK(started_ && (!finished_ || error_occurred_)); + BM_CHECK(started_ && (!finished_ || error_occurred_)); if (!error_occurred_) { PauseTiming(); } @@ -215,11 +247,42 @@ void State::FinishKeepRunning() { namespace internal { namespace { +// Flushes streams after invoking reporter methods that write to them. This +// ensures users get timely updates even when streams are not line-buffered. +void FlushStreams(BenchmarkReporter* reporter) { + if (!reporter) return; + std::flush(reporter->GetOutputStream()); + std::flush(reporter->GetErrorStream()); +} + +// Reports in both display and file reporters. +void Report(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, const RunResults& run_results) { + auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only, + const RunResults& results) { + assert(reporter); + // If there are no aggregates, do output non-aggregates. + aggregates_only &= !results.aggregates_only.empty(); + if (!aggregates_only) reporter->ReportRuns(results.non_aggregates); + if (!results.aggregates_only.empty()) + reporter->ReportRuns(results.aggregates_only); + }; + + report_one(display_reporter, run_results.display_report_aggregates_only, + run_results); + if (file_reporter) + report_one(file_reporter, run_results.file_report_aggregates_only, + run_results); + + FlushStreams(display_reporter); + FlushStreams(file_reporter); +} + void RunBenchmarks(const std::vector& benchmarks, BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { // Note the file_reporter can be null. - CHECK(display_reporter != nullptr); + BM_CHECK(display_reporter != nullptr); // Determine the width of the name field using a minimum width of 10. bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; @@ -227,10 +290,10 @@ void RunBenchmarks(const std::vector& benchmarks, size_t stat_field_width = 0; for (const BenchmarkInstance& benchmark : benchmarks) { name_field_width = - std::max(name_field_width, benchmark.name.str().size()); - might_have_aggregates |= benchmark.repetitions > 1; + std::max(name_field_width, benchmark.name().str().size()); + might_have_aggregates |= benchmark.repetitions() > 1; - for (const auto& Stat : *benchmark.statistics) + for (const auto& Stat : benchmark.statistics()) stat_field_width = std::max(stat_field_width, Stat.name_.size()); } if (might_have_aggregates) name_field_width += 1 + stat_field_width; @@ -239,56 +302,82 @@ void RunBenchmarks(const std::vector& benchmarks, BenchmarkReporter::Context context; context.name_field_width = name_field_width; - // Keep track of running times of all instances of current benchmark - std::vector complexity_reports; - - // We flush streams after invoking reporter methods that write to them. This - // ensures users get timely updates even when streams are not line-buffered. - auto flushStreams = [](BenchmarkReporter* reporter) { - if (!reporter) return; - std::flush(reporter->GetOutputStream()); - std::flush(reporter->GetErrorStream()); - }; + // Keep track of running times of all instances of each benchmark family. + std::map + per_family_reports; if (display_reporter->ReportContext(context) && (!file_reporter || file_reporter->ReportContext(context))) { - flushStreams(display_reporter); - flushStreams(file_reporter); + FlushStreams(display_reporter); + FlushStreams(file_reporter); - for (const auto& benchmark : benchmarks) { - RunResults run_results = RunBenchmark(benchmark, &complexity_reports); + size_t num_repetitions_total = 0; - auto report = [&run_results](BenchmarkReporter* reporter, - bool report_aggregates_only) { - assert(reporter); - // If there are no aggregates, do output non-aggregates. - report_aggregates_only &= !run_results.aggregates_only.empty(); - if (!report_aggregates_only) - reporter->ReportRuns(run_results.non_aggregates); - if (!run_results.aggregates_only.empty()) - reporter->ReportRuns(run_results.aggregates_only); - }; + std::vector runners; + runners.reserve(benchmarks.size()); + for (const BenchmarkInstance& benchmark : benchmarks) { + BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr; + if (benchmark.complexity() != oNone) + reports_for_family = &per_family_reports[benchmark.family_index()]; - report(display_reporter, run_results.display_report_aggregates_only); - if (file_reporter) - report(file_reporter, run_results.file_report_aggregates_only); + runners.emplace_back(benchmark, reports_for_family); + int num_repeats_of_this_instance = runners.back().GetNumRepeats(); + num_repetitions_total += num_repeats_of_this_instance; + if (reports_for_family) + reports_for_family->num_runs_total += num_repeats_of_this_instance; + } + assert(runners.size() == benchmarks.size() && "Unexpected runner count."); - flushStreams(display_reporter); - flushStreams(file_reporter); + std::vector repetition_indices; + repetition_indices.reserve(num_repetitions_total); + for (size_t runner_index = 0, num_runners = runners.size(); + runner_index != num_runners; ++runner_index) { + const internal::BenchmarkRunner& runner = runners[runner_index]; + std::fill_n(std::back_inserter(repetition_indices), + runner.GetNumRepeats(), runner_index); + } + assert(repetition_indices.size() == num_repetitions_total && + "Unexpected number of repetition indexes."); + + if (FLAGS_benchmark_enable_random_interleaving) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(repetition_indices.begin(), repetition_indices.end(), g); + } + + for (size_t repetition_index : repetition_indices) { + internal::BenchmarkRunner& runner = runners[repetition_index]; + runner.DoOneRepetition(); + if (runner.HasRepeatsRemaining()) continue; + // FIXME: report each repetition separately, not all of them in bulk. + + RunResults run_results = runner.GetResults(); + + // Maybe calculate complexity report + if (const auto* reports_for_family = runner.GetReportsForFamily()) { + if (reports_for_family->num_runs_done == + reports_for_family->num_runs_total) { + auto additional_run_stats = ComputeBigO(reports_for_family->Runs); + run_results.aggregates_only.insert(run_results.aggregates_only.end(), + additional_run_stats.begin(), + additional_run_stats.end()); + per_family_reports.erase( + static_cast(reports_for_family->Runs.front().family_index)); + } + } + + Report(display_reporter, file_reporter, run_results); } } display_reporter->Finalize(); if (file_reporter) file_reporter->Finalize(); - flushStreams(display_reporter); - flushStreams(file_reporter); + FlushStreams(display_reporter); + FlushStreams(file_reporter); } // Disable deprecated warnings temporarily because we need to reference // CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif +BENCHMARK_DISABLE_DEPRECATED_WARNING std::unique_ptr CreateReporter( std::string const& name, ConsoleReporter::OutputOptions output_opts) { @@ -305,9 +394,7 @@ std::unique_ptr CreateReporter( } } -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif +BENCHMARK_RESTORE_DEPRECATED_WARNING } // end namespace @@ -342,16 +429,32 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) { } // end namespace internal size_t RunSpecifiedBenchmarks() { - return RunSpecifiedBenchmarks(nullptr, nullptr); + return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter); +} + +size_t RunSpecifiedBenchmarks(std::string spec) { + return RunSpecifiedBenchmarks(nullptr, nullptr, std::move(spec)); } size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) { - return RunSpecifiedBenchmarks(display_reporter, nullptr); + return RunSpecifiedBenchmarks(display_reporter, nullptr, + FLAGS_benchmark_filter); +} + +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + std::string spec) { + return RunSpecifiedBenchmarks(display_reporter, nullptr, std::move(spec)); } size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { - std::string spec = FLAGS_benchmark_filter; + return RunSpecifiedBenchmarks(display_reporter, file_reporter, + FLAGS_benchmark_filter); +} + +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, + std::string spec) { if (spec.empty() || spec == "all") spec = "."; // Regexp that matches all benchmarks @@ -377,7 +480,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, if (!fname.empty()) { output_file.open(fname); if (!output_file.is_open()) { - Err << "invalid file name: '" << fname << std::endl; + Err << "invalid file name: '" << fname << "'" << std::endl; std::exit(1); } if (!file_reporter) { @@ -399,7 +502,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, if (FLAGS_benchmark_list_tests) { for (auto const& benchmark : benchmarks) - Out << benchmark.name.str() << "\n"; + Out << benchmark.name().str() << "\n"; } else { internal::RunBenchmarks(benchmarks, display_reporter, file_reporter); } @@ -407,10 +510,22 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, return benchmarks.size(); } +std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; } + void RegisterMemoryManager(MemoryManager* manager) { internal::memory_manager = manager; } +void AddCustomContext(const std::string& key, const std::string& value) { + if (internal::global_context == nullptr) { + internal::global_context = new std::map(); + } + if (!internal::global_context->emplace(key, value).second) { + std::cerr << "Failed to add custom context \"" << key << "\" as it already " + << "exists with value \"" << value << "\"\n"; + } +} + namespace internal { void PrintUsageAndExit() { @@ -420,6 +535,7 @@ void PrintUsageAndExit() { " [--benchmark_filter=]\n" " [--benchmark_min_time=]\n" " [--benchmark_repetitions=]\n" + " [--benchmark_enable_random_interleaving={true|false}]\n" " [--benchmark_report_aggregates_only={true|false}]\n" " [--benchmark_display_aggregates_only={true|false}]\n" " [--benchmark_format=]\n" @@ -427,6 +543,8 @@ void PrintUsageAndExit() { " [--benchmark_out_format=]\n" " [--benchmark_color={auto|true|false}]\n" " [--benchmark_counters_tabular={true|false}]\n" + " [--benchmark_perf_counters=,...]\n" + " [--benchmark_context==,...]\n" " [--v=]\n"); exit(0); } @@ -443,6 +561,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { &FLAGS_benchmark_min_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", &FLAGS_benchmark_repetitions) || + ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", + &FLAGS_benchmark_enable_random_interleaving) || ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", &FLAGS_benchmark_report_aggregates_only) || ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", @@ -452,11 +572,12 @@ void ParseCommandLineFlags(int* argc, char** argv) { ParseStringFlag(argv[i], "benchmark_out_format", &FLAGS_benchmark_out_format) || ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) || - // "color_print" is the deprecated name for "benchmark_color". - // TODO: Remove this. - ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) || ParseBoolFlag(argv[i], "benchmark_counters_tabular", &FLAGS_benchmark_counters_tabular) || + ParseStringFlag(argv[i], "benchmark_perf_counters", + &FLAGS_benchmark_perf_counters) || + ParseKeyValueFlag(argv[i], "benchmark_context", + &FLAGS_benchmark_context) || ParseInt32Flag(argv[i], "v", &FLAGS_v)) { for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1]; @@ -467,13 +588,17 @@ void ParseCommandLineFlags(int* argc, char** argv) { } } for (auto const* flag : - {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) + {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) { if (*flag != "console" && *flag != "json" && *flag != "csv") { PrintUsageAndExit(); } + } if (FLAGS_benchmark_color.empty()) { PrintUsageAndExit(); } + for (const auto& kv : FLAGS_benchmark_context) { + AddCustomContext(kv.first, kv.second); + } } int InitializeStreams() { @@ -488,6 +613,8 @@ void Initialize(int* argc, char** argv) { internal::LogLevel() = FLAGS_v; } +void Shutdown() { delete internal::global_context; } + bool ReportUnrecognizedArguments(int argc, char** argv) { for (int i = 1; i < argc; ++i) { fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index d468a25..4de36e3 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -1,15 +1,112 @@ #include "benchmark_api_internal.h" +#include + +#include "string_util.h" + namespace benchmark { namespace internal { -State BenchmarkInstance::Run(IterationCount iters, int thread_id, - internal::ThreadTimer* timer, - internal::ThreadManager* manager) const { - State st(iters, arg, thread_id, threads, timer, manager); - benchmark->Run(st); +BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, + int per_family_instance_idx, + const std::vector& args, + int thread_count) + : benchmark_(*benchmark), + family_index_(family_idx), + per_family_instance_index_(per_family_instance_idx), + aggregation_report_mode_(benchmark_.aggregation_report_mode_), + args_(args), + time_unit_(benchmark_.time_unit_), + measure_process_cpu_time_(benchmark_.measure_process_cpu_time_), + use_real_time_(benchmark_.use_real_time_), + use_manual_time_(benchmark_.use_manual_time_), + complexity_(benchmark_.complexity_), + complexity_lambda_(benchmark_.complexity_lambda_), + statistics_(benchmark_.statistics_), + repetitions_(benchmark_.repetitions_), + min_time_(benchmark_.min_time_), + iterations_(benchmark_.iterations_), + threads_(thread_count) { + name_.function_name = benchmark_.name_; + + size_t arg_i = 0; + for (const auto& arg : args) { + if (!name_.args.empty()) { + name_.args += '/'; + } + + if (arg_i < benchmark->arg_names_.size()) { + const auto& arg_name = benchmark_.arg_names_[arg_i]; + if (!arg_name.empty()) { + name_.args += StrFormat("%s:", arg_name.c_str()); + } + } + + name_.args += StrFormat("%" PRId64, arg); + ++arg_i; + } + + if (!IsZero(benchmark->min_time_)) { + name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); + } + + if (benchmark_.iterations_ != 0) { + name_.iterations = StrFormat( + "iterations:%lu", static_cast(benchmark_.iterations_)); + } + + if (benchmark_.repetitions_ != 0) { + name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_); + } + + if (benchmark_.measure_process_cpu_time_) { + name_.time_type = "process_time"; + } + + if (benchmark_.use_manual_time_) { + if (!name_.time_type.empty()) { + name_.time_type += '/'; + } + name_.time_type += "manual_time"; + } else if (benchmark_.use_real_time_) { + if (!name_.time_type.empty()) { + name_.time_type += '/'; + } + name_.time_type += "real_time"; + } + + if (!benchmark_.thread_counts_.empty()) { + name_.threads = StrFormat("threads:%d", threads_); + } + + setup_ = benchmark_.setup_; + teardown_ = benchmark_.teardown_; +} + +State BenchmarkInstance::Run( + IterationCount iters, int thread_id, internal::ThreadTimer* timer, + internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement) const { + State st(iters, args_, thread_id, threads_, timer, manager, + perf_counters_measurement); + benchmark_.Run(st); return st; } -} // internal -} // benchmark +void BenchmarkInstance::Setup() const { + if (setup_) { + State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr, + nullptr); + setup_(st); + } +} + +void BenchmarkInstance::Teardown() const { + if (teardown_) { + State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr, + nullptr); + teardown_(st); + } +} +} // namespace internal +} // namespace benchmark diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 264eff9..94c2b29 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -1,9 +1,6 @@ #ifndef BENCHMARK_API_INTERNAL_H #define BENCHMARK_API_INTERNAL_H -#include "benchmark/benchmark.h" -#include "commandlineflags.h" - #include #include #include @@ -11,32 +8,66 @@ #include #include +#include "benchmark/benchmark.h" +#include "commandlineflags.h" + namespace benchmark { namespace internal { // Information kept per benchmark we may want to run -struct BenchmarkInstance { - BenchmarkName name; - Benchmark* benchmark; - AggregationReportMode aggregation_report_mode; - std::vector arg; - TimeUnit time_unit; - int range_multiplier; - bool measure_process_cpu_time; - bool use_real_time; - bool use_manual_time; - BigO complexity; - BigOFunc* complexity_lambda; - UserCounters counters; - const std::vector* statistics; - bool last_benchmark_instance; - int repetitions; - double min_time; - IterationCount iterations; - int threads; // Number of concurrent threads to us +class BenchmarkInstance { + public: + BenchmarkInstance(Benchmark* benchmark, int family_index, + int per_family_instance_index, + const std::vector& args, int threads); + + const BenchmarkName& name() const { return name_; } + int family_index() const { return family_index_; } + int per_family_instance_index() const { return per_family_instance_index_; } + AggregationReportMode aggregation_report_mode() const { + return aggregation_report_mode_; + } + TimeUnit time_unit() const { return time_unit_; } + bool measure_process_cpu_time() const { return measure_process_cpu_time_; } + bool use_real_time() const { return use_real_time_; } + bool use_manual_time() const { return use_manual_time_; } + BigO complexity() const { return complexity_; } + BigOFunc* complexity_lambda() const { return complexity_lambda_; } + const std::vector& statistics() const { return statistics_; } + int repetitions() const { return repetitions_; } + double min_time() const { return min_time_; } + IterationCount iterations() const { return iterations_; } + int threads() const { return threads_; } + void Setup() const; + void Teardown() const; State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, - internal::ThreadManager* manager) const; + internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement) const; + + private: + BenchmarkName name_; + Benchmark& benchmark_; + const int family_index_; + const int per_family_instance_index_; + AggregationReportMode aggregation_report_mode_; + const std::vector& args_; + TimeUnit time_unit_; + bool measure_process_cpu_time_; + bool use_real_time_; + bool use_manual_time_; + BigO complexity_; + BigOFunc* complexity_lambda_; + UserCounters counters_; + const std::vector& statistics_; + int repetitions_; + double min_time_; + IterationCount iterations_; + int threads_; // Number of concurrent threads to us + + typedef void (*callback_function)(const benchmark::State&); + callback_function setup_ = nullptr; + callback_function teardown_ = nullptr; }; bool FindBenchmarksInternal(const std::string& re, diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 65d9944..61a0c26 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -35,11 +36,6 @@ #include #include -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS -#endif -#include - #include "benchmark/benchmark.h" #include "benchmark_api_internal.h" #include "check.h" @@ -115,7 +111,7 @@ void BenchmarkFamilies::ClearBenchmarks() { bool BenchmarkFamilies::FindBenchmarks( std::string spec, std::vector* benchmarks, std::ostream* ErrStream) { - CHECK(ErrStream); + BM_CHECK(ErrStream); auto& Err = *ErrStream; // Make regular expression out of command-line flag std::string error_msg; @@ -133,8 +129,13 @@ bool BenchmarkFamilies::FindBenchmarks( // Special list of thread counts to use when none are specified const std::vector one_thread = {1}; + int next_family_index = 0; + MutexLock l(mutex_); for (std::unique_ptr& family : families_) { + int family_index = next_family_index; + int per_family_instance_index = 0; + // Family was deleted or benchmark doesn't match if (!family) continue; @@ -154,84 +155,24 @@ bool BenchmarkFamilies::FindBenchmarks( } // reserve in the special case the regex ".", since we know the final // family size. - if (spec == ".") benchmarks->reserve(family_size); + if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size); for (auto const& args : family->args_) { for (int num_threads : *thread_counts) { - BenchmarkInstance instance; - instance.name.function_name = family->name_; - instance.benchmark = family.get(); - instance.aggregation_report_mode = family->aggregation_report_mode_; - instance.arg = args; - instance.time_unit = family->time_unit_; - instance.range_multiplier = family->range_multiplier_; - instance.min_time = family->min_time_; - instance.iterations = family->iterations_; - instance.repetitions = family->repetitions_; - instance.measure_process_cpu_time = family->measure_process_cpu_time_; - instance.use_real_time = family->use_real_time_; - instance.use_manual_time = family->use_manual_time_; - instance.complexity = family->complexity_; - instance.complexity_lambda = family->complexity_lambda_; - instance.statistics = &family->statistics_; - instance.threads = num_threads; + BenchmarkInstance instance(family.get(), family_index, + per_family_instance_index, args, + num_threads); - // Add arguments to instance name - size_t arg_i = 0; - for (auto const& arg : args) { - if (!instance.name.args.empty()) { - instance.name.args += '/'; - } - - if (arg_i < family->arg_names_.size()) { - const auto& arg_name = family->arg_names_[arg_i]; - if (!arg_name.empty()) { - instance.name.args += StrFormat("%s:", arg_name.c_str()); - } - } - - instance.name.args += StrFormat("%" PRId64, arg); - ++arg_i; - } - - if (!IsZero(family->min_time_)) - instance.name.min_time = - StrFormat("min_time:%0.3f", family->min_time_); - if (family->iterations_ != 0) { - instance.name.iterations = - StrFormat("iterations:%lu", - static_cast(family->iterations_)); - } - if (family->repetitions_ != 0) - instance.name.repetitions = - StrFormat("repeats:%d", family->repetitions_); - - if (family->measure_process_cpu_time_) { - instance.name.time_type = "process_time"; - } - - if (family->use_manual_time_) { - if (!instance.name.time_type.empty()) { - instance.name.time_type += '/'; - } - instance.name.time_type += "manual_time"; - } else if (family->use_real_time_) { - if (!instance.name.time_type.empty()) { - instance.name.time_type += '/'; - } - instance.name.time_type += "real_time"; - } - - // Add the number of threads used to the name - if (!family->thread_counts_.empty()) { - instance.name.threads = StrFormat("threads:%d", instance.threads); - } - - const auto full_name = instance.name.str(); + const auto full_name = instance.name().str(); if ((re.Match(full_name) && !isNegativeFilter) || (!re.Match(full_name) && isNegativeFilter)) { - instance.last_benchmark_instance = (&args == &family->args_.back()); benchmarks->push_back(std::move(instance)); + + ++per_family_instance_index; + + // Only bump the next family index once we've estabilished that + // at least one instance of this family will be run. + if (next_family_index == family_index) ++next_family_index; } } } @@ -270,16 +211,24 @@ Benchmark::Benchmark(const char* name) use_real_time_(false), use_manual_time_(false), complexity_(oNone), - complexity_lambda_(nullptr) { + complexity_lambda_(nullptr), + setup_(nullptr), + teardown_(nullptr) { ComputeStatistics("mean", StatisticsMean); ComputeStatistics("median", StatisticsMedian); ComputeStatistics("stddev", StatisticsStdDev); + ComputeStatistics("cv", StatisticsCV, kPercentage); } Benchmark::~Benchmark() {} +Benchmark* Benchmark::Name(const std::string& name) { + SetName(name.c_str()); + return this; +} + Benchmark* Benchmark::Arg(int64_t x) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); args_.push_back({x}); return this; } @@ -290,7 +239,7 @@ Benchmark* Benchmark::Unit(TimeUnit unit) { } Benchmark* Benchmark::Range(int64_t start, int64_t limit) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); std::vector arglist; AddRange(&arglist, start, limit, range_multiplier_); @@ -302,7 +251,7 @@ Benchmark* Benchmark::Range(int64_t start, int64_t limit) { Benchmark* Benchmark::Ranges( const std::vector>& ranges) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(ranges.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(ranges.size())); std::vector> arglists(ranges.size()); for (std::size_t i = 0; i < ranges.size(); i++) { AddRange(&arglists[i], ranges[i].first, ranges[i].second, @@ -316,7 +265,7 @@ Benchmark* Benchmark::Ranges( Benchmark* Benchmark::ArgsProduct( const std::vector>& arglists) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(arglists.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(arglists.size())); std::vector indices(arglists.size()); const std::size_t total = std::accumulate( @@ -343,20 +292,20 @@ Benchmark* Benchmark::ArgsProduct( } Benchmark* Benchmark::ArgName(const std::string& name) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); arg_names_ = {name}; return this; } Benchmark* Benchmark::ArgNames(const std::vector& names) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(names.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(names.size())); arg_names_ = names; return this; } Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); - CHECK_LE(start, limit); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK_LE(start, limit); for (int64_t arg = start; arg <= limit; arg += step) { args_.push_back({arg}); } @@ -364,7 +313,7 @@ Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { } Benchmark* Benchmark::Args(const std::vector& args) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(args.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(args.size())); args_.push_back(args); return this; } @@ -374,28 +323,40 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { return this; } +Benchmark* Benchmark::Setup(void (*setup)(const benchmark::State&)) { + BM_CHECK(setup != nullptr); + setup_ = setup; + return this; +} + +Benchmark* Benchmark::Teardown(void (*teardown)(const benchmark::State&)) { + BM_CHECK(teardown != nullptr); + teardown_ = teardown; + return this; +} + Benchmark* Benchmark::RangeMultiplier(int multiplier) { - CHECK(multiplier > 1); + BM_CHECK(multiplier > 1); range_multiplier_ = multiplier; return this; } Benchmark* Benchmark::MinTime(double t) { - CHECK(t > 0.0); - CHECK(iterations_ == 0); + BM_CHECK(t > 0.0); + BM_CHECK(iterations_ == 0); min_time_ = t; return this; } Benchmark* Benchmark::Iterations(IterationCount n) { - CHECK(n > 0); - CHECK(IsZero(min_time_)); + BM_CHECK(n > 0); + BM_CHECK(IsZero(min_time_)); iterations_ = n; return this; } Benchmark* Benchmark::Repetitions(int n) { - CHECK(n > 0); + BM_CHECK(n > 0); repetitions_ = n; return this; } @@ -428,14 +389,14 @@ Benchmark* Benchmark::MeasureProcessCPUTime() { } Benchmark* Benchmark::UseRealTime() { - CHECK(!use_manual_time_) + BM_CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; use_real_time_ = true; return this; } Benchmark* Benchmark::UseManualTime() { - CHECK(!use_real_time_) + BM_CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; use_manual_time_ = true; return this; @@ -452,21 +413,22 @@ Benchmark* Benchmark::Complexity(BigOFunc* complexity) { return this; } -Benchmark* Benchmark::ComputeStatistics(std::string name, - StatisticsFunc* statistics) { - statistics_.emplace_back(name, statistics); +Benchmark* Benchmark::ComputeStatistics(const std::string& name, + StatisticsFunc* statistics, + StatisticUnit unit) { + statistics_.emplace_back(name, statistics, unit); return this; } Benchmark* Benchmark::Threads(int t) { - CHECK_GT(t, 0); + BM_CHECK_GT(t, 0); thread_counts_.push_back(t); return this; } Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { - CHECK_GT(min_threads, 0); - CHECK_GE(max_threads, min_threads); + BM_CHECK_GT(min_threads, 0); + BM_CHECK_GE(max_threads, min_threads); AddRange(&thread_counts_, min_threads, max_threads, 2); return this; @@ -474,9 +436,9 @@ Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, int stride) { - CHECK_GT(min_threads, 0); - CHECK_GE(max_threads, min_threads); - CHECK_GE(stride, 1); + BM_CHECK_GT(min_threads, 0); + BM_CHECK_GE(max_threads, min_threads); + BM_CHECK_GE(stride, 1); for (auto i = min_threads; i < max_threads; i += stride) { thread_counts_.push_back(i); @@ -512,4 +474,19 @@ void ClearRegisteredBenchmarks() { internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks(); } +std::vector CreateRange(int64_t lo, int64_t hi, int multi) { + std::vector args; + internal::AddRange(&args, lo, hi, multi); + return args; +} + +std::vector CreateDenseRange(int64_t start, int64_t limit, int step) { + BM_CHECK_LE(start, limit); + std::vector args; + for (int64_t arg = start; arg <= limit; arg += step) { + args.push_back(arg); + } + return args; +} + } // end namespace benchmark diff --git a/src/benchmark_register.h b/src/benchmark_register.h index 61377d7..d3f4974 100644 --- a/src/benchmark_register.h +++ b/src/benchmark_register.h @@ -1,6 +1,7 @@ #ifndef BENCHMARK_REGISTER_H #define BENCHMARK_REGISTER_H +#include #include #include "check.h" @@ -11,18 +12,18 @@ namespace internal { // Append the powers of 'mult' in the closed interval [lo, hi]. // Returns iterator to the start of the inserted range. template -typename std::vector::iterator -AddPowers(std::vector* dst, T lo, T hi, int mult) { - CHECK_GE(lo, 0); - CHECK_GE(hi, lo); - CHECK_GE(mult, 2); +typename std::vector::iterator AddPowers(std::vector* dst, T lo, T hi, + int mult) { + BM_CHECK_GE(lo, 0); + BM_CHECK_GE(hi, lo); + BM_CHECK_GE(mult, 2); const size_t start_offset = dst->size(); static const T kmax = std::numeric_limits::max(); // Space out the values in multiples of "mult" - for (T i = 1; i <= hi; i *= mult) { + for (T i = static_cast(1); i <= hi; i *= mult) { if (i >= lo) { dst->push_back(i); } @@ -37,10 +38,10 @@ AddPowers(std::vector* dst, T lo, T hi, int mult) { template void AddNegatedPowers(std::vector* dst, T lo, T hi, int mult) { // We negate lo and hi so we require that they cannot be equal to 'min'. - CHECK_GT(lo, std::numeric_limits::min()); - CHECK_GT(hi, std::numeric_limits::min()); - CHECK_GE(hi, lo); - CHECK_LE(hi, 0); + BM_CHECK_GT(lo, std::numeric_limits::min()); + BM_CHECK_GT(hi, std::numeric_limits::min()); + BM_CHECK_GE(hi, lo); + BM_CHECK_LE(hi, 0); // Add positive powers, then negate and reverse. // Casts necessary since small integers get promoted @@ -59,8 +60,8 @@ void AddRange(std::vector* dst, T lo, T hi, int mult) { static_assert(std::is_integral::value && std::is_signed::value, "Args type must be a signed integer"); - CHECK_GE(hi, lo); - CHECK_GE(mult, 2); + BM_CHECK_GE(hi, lo); + BM_CHECK_GE(mult, 2); // Add "lo" dst->push_back(lo); @@ -86,7 +87,7 @@ void AddRange(std::vector* dst, T lo, T hi, int mult) { } // Treat 0 as a special case (see discussion on #762). - if (lo <= 0 && hi >= 0) { + if (lo < 0 && hi >= 0) { dst->push_back(0); } diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 7bc6b63..eac807b 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "benchmark_runner.h" + #include "benchmark/benchmark.h" #include "benchmark_api_internal.h" #include "internal_macros.h" @@ -45,6 +46,7 @@ #include "internal_macros.h" #include "log.h" #include "mutex.h" +#include "perf_counters.h" #include "re.h" #include "statistics.h" #include "string_util.h" @@ -65,59 +67,64 @@ BenchmarkReporter::Run CreateRunReport( const benchmark::internal::BenchmarkInstance& b, const internal::ThreadManager::Result& results, IterationCount memory_iterations, - const MemoryManager::Result& memory_result, double seconds, - int64_t repetition_index) { + const MemoryManager::Result* memory_result, double seconds, + int64_t repetition_index, int64_t repeats) { // Create report about this benchmark run. BenchmarkReporter::Run report; - report.run_name = b.name; + report.run_name = b.name(); + report.family_index = b.family_index(); + report.per_family_instance_index = b.per_family_instance_index(); report.error_occurred = results.has_error_; report.error_message = results.error_message_; report.report_label = results.report_label_; // This is the total iterations across all threads. report.iterations = results.iterations; - report.time_unit = b.time_unit; - report.threads = b.threads; + report.time_unit = b.time_unit(); + report.threads = b.threads(); report.repetition_index = repetition_index; - report.repetitions = b.repetitions; + report.repetitions = repeats; if (!report.error_occurred) { - if (b.use_manual_time) { + if (b.use_manual_time()) { report.real_accumulated_time = results.manual_time_used; } else { report.real_accumulated_time = results.real_time_used; } report.cpu_accumulated_time = results.cpu_time_used; report.complexity_n = results.complexity_n; - report.complexity = b.complexity; - report.complexity_lambda = b.complexity_lambda; - report.statistics = b.statistics; + report.complexity = b.complexity(); + report.complexity_lambda = b.complexity_lambda(); + report.statistics = &b.statistics(); report.counters = results.counters; if (memory_iterations > 0) { - report.has_memory_result = true; + assert(memory_result != nullptr); + report.memory_result = memory_result; report.allocs_per_iter = - memory_iterations ? static_cast(memory_result.num_allocs) / + memory_iterations ? static_cast(memory_result->num_allocs) / memory_iterations : 0; - report.max_bytes_used = memory_result.max_bytes_used; } - internal::Finish(&report.counters, results.iterations, seconds, b.threads); + internal::Finish(&report.counters, results.iterations, seconds, + b.threads()); } return report; } // Execute one thread of benchmark b for the specified number of iterations. -// Adds the stats collected for the thread into *total. +// Adds the stats collected for the thread into manager->results. void RunInThread(const BenchmarkInstance* b, IterationCount iters, - int thread_id, ThreadManager* manager) { + int thread_id, ThreadManager* manager, + PerfCountersMeasurement* perf_counters_measurement) { internal::ThreadTimer timer( - b->measure_process_cpu_time + b->measure_process_cpu_time() ? internal::ThreadTimer::CreateProcessCpuTime() : internal::ThreadTimer::Create()); - State st = b->Run(iters, thread_id, &timer, manager); - CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) + State st = + b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); + BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { MutexLock l(manager->GetBenchmarkMutex()); @@ -132,229 +139,219 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, manager->NotifyThreadComplete(); } -class BenchmarkRunner { - public: - BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, - std::vector* complexity_reports_) - : b(b_), - complexity_reports(*complexity_reports_), - min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time), - repeats(b.repetitions != 0 ? b.repetitions - : FLAGS_benchmark_repetitions), - has_explicit_iteration_count(b.iterations != 0), - pool(b.threads - 1), - iters(has_explicit_iteration_count ? b.iterations : 1) { - run_results.display_report_aggregates_only = - (FLAGS_benchmark_report_aggregates_only || - FLAGS_benchmark_display_aggregates_only); - run_results.file_report_aggregates_only = - FLAGS_benchmark_report_aggregates_only; - if (b.aggregation_report_mode != internal::ARM_Unspecified) { - run_results.display_report_aggregates_only = - (b.aggregation_report_mode & - internal::ARM_DisplayReportAggregatesOnly); - run_results.file_report_aggregates_only = - (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); - } - - for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { - DoOneRepetition(repetition_num); - } - - // Calculate additional statistics - run_results.aggregates_only = ComputeStats(run_results.non_aggregates); - - // Maybe calculate complexity report - if ((b.complexity != oNone) && b.last_benchmark_instance) { - auto additional_run_stats = ComputeBigO(complexity_reports); - run_results.aggregates_only.insert(run_results.aggregates_only.end(), - additional_run_stats.begin(), - additional_run_stats.end()); - complexity_reports.clear(); - } - } - - RunResults&& get_results() { return std::move(run_results); } - - private: - RunResults run_results; - - const benchmark::internal::BenchmarkInstance& b; - std::vector& complexity_reports; - - const double min_time; - const int repeats; - const bool has_explicit_iteration_count; - - std::vector pool; - - IterationCount iters; // preserved between repetitions! - // So only the first repetition has to find/calculate it, - // the other repetitions will just use that precomputed iteration count. - - struct IterationResults { - internal::ThreadManager::Result results; - IterationCount iters; - double seconds; - }; - IterationResults DoNIterations() { - VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n"; - - std::unique_ptr manager; - manager.reset(new internal::ThreadManager(b.threads)); - - // Run all but one thread in separate threads - for (std::size_t ti = 0; ti < pool.size(); ++ti) { - pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti + 1), - manager.get()); - } - // And run one thread here directly. - // (If we were asked to run just one thread, we don't create new threads.) - // Yes, we need to do this here *after* we start the separate threads. - RunInThread(&b, iters, 0, manager.get()); - - // The main thread has finished. Now let's wait for the other threads. - manager->WaitForAllThreads(); - for (std::thread& thread : pool) thread.join(); - - IterationResults i; - // Acquire the measurements/counters from the manager, UNDER THE LOCK! - { - MutexLock l(manager->GetBenchmarkMutex()); - i.results = manager->results; - } - - // And get rid of the manager. - manager.reset(); - - // Adjust real/manual time stats since they were reported per thread. - i.results.real_time_used /= b.threads; - i.results.manual_time_used /= b.threads; - // If we were measuring whole-process CPU usage, adjust the CPU time too. - if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads; - - VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" - << i.results.real_time_used << "\n"; - - // So for how long were we running? - i.iters = iters; - // Base decisions off of real time if requested by this benchmark. - i.seconds = i.results.cpu_time_used; - if (b.use_manual_time) { - i.seconds = i.results.manual_time_used; - } else if (b.use_real_time) { - i.seconds = i.results.real_time_used; - } - - return i; - } - - IterationCount PredictNumItersNeeded(const IterationResults& i) const { - // See how much iterations should be increased by. - // Note: Avoid division by zero with max(seconds, 1ns). - double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); - // If our last run was at least 10% of FLAGS_benchmark_min_time then we - // use the multiplier directly. - // Otherwise we use at most 10 times expansion. - // NOTE: When the last run was at least 10% of the min time the max - // expansion should be 14x. - bool is_significant = (i.seconds / min_time) > 0.1; - multiplier = is_significant ? multiplier : std::min(10.0, multiplier); - if (multiplier <= 1.0) multiplier = 2.0; - - // So what seems to be the sufficiently-large iteration count? Round up. - const IterationCount max_next_iters = static_cast( - std::lround(std::max(multiplier * static_cast(i.iters), - static_cast(i.iters) + 1.0))); - // But we do have *some* sanity limits though.. - const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); - - VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; - return next_iters; // round up before conversion to integer. - } - - bool ShouldReportIterationResults(const IterationResults& i) const { - // Determine if this run should be reported; - // Either it has run for a sufficient amount of time - // or because an error was reported. - return i.results.has_error_ || - i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= min_time || // The elapsed time is large enough. - // CPU time is specified but the elapsed real time greatly exceeds - // the minimum time. - // Note that user provided timers are except from this sanity check. - ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time); - } - - void DoOneRepetition(int64_t repetition_index) { - const bool is_the_first_repetition = repetition_index == 0; - IterationResults i; - - // We *may* be gradually increasing the length (iteration count) - // of the benchmark until we decide the results are significant. - // And once we do, we report those last results and exit. - // Please do note that the if there are repetitions, the iteration count - // is *only* calculated for the *first* repetition, and other repetitions - // simply use that precomputed iteration count. - for (;;) { - i = DoNIterations(); - - // Do we consider the results to be significant? - // If we are doing repetitions, and the first repetition was already done, - // it has calculated the correct iteration time, so we have run that very - // iteration count just now. No need to calculate anything. Just report. - // Else, the normal rules apply. - const bool results_are_significant = !is_the_first_repetition || - has_explicit_iteration_count || - ShouldReportIterationResults(i); - - if (results_are_significant) break; // Good, let's report them! - - // Nope, bad iteration. Let's re-estimate the hopefully-sufficient - // iteration count, and run the benchmark again... - - iters = PredictNumItersNeeded(i); - assert(iters > i.iters && - "if we did more iterations than we want to do the next time, " - "then we should have accepted the current iteration run."); - } - - // Oh, one last thing, we need to also produce the 'memory measurements'.. - MemoryManager::Result memory_result; - IterationCount memory_iterations = 0; - if (memory_manager != nullptr) { - // Only run a few iterations to reduce the impact of one-time - // allocations in benchmarks that are not properly managed. - memory_iterations = std::min(16, iters); - memory_manager->Start(); - std::unique_ptr manager; - manager.reset(new internal::ThreadManager(1)); - RunInThread(&b, memory_iterations, 0, manager.get()); - manager->WaitForAllThreads(); - manager.reset(); - - memory_manager->Stop(&memory_result); - } - - // Ok, now actualy report. - BenchmarkReporter::Run report = - CreateRunReport(b, i.results, memory_iterations, memory_result, - i.seconds, repetition_index); - - if (!report.error_occurred && b.complexity != oNone) - complexity_reports.push_back(report); - - run_results.non_aggregates.push_back(report); - } -}; - } // end namespace -RunResults RunBenchmark( - const benchmark::internal::BenchmarkInstance& b, - std::vector* complexity_reports) { - internal::BenchmarkRunner r(b, complexity_reports); - return r.get_results(); +BenchmarkRunner::BenchmarkRunner( + const benchmark::internal::BenchmarkInstance& b_, + BenchmarkReporter::PerFamilyRunReports* reports_for_family_) + : b(b_), + reports_for_family(reports_for_family_), + min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), + repeats(b.repetitions() != 0 ? b.repetitions() + : FLAGS_benchmark_repetitions), + has_explicit_iteration_count(b.iterations() != 0), + pool(b.threads() - 1), + iters(has_explicit_iteration_count ? b.iterations() : 1), + perf_counters_measurement( + PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), + perf_counters_measurement_ptr(perf_counters_measurement.IsValid() + ? &perf_counters_measurement + : nullptr) { + run_results.display_report_aggregates_only = + (FLAGS_benchmark_report_aggregates_only || + FLAGS_benchmark_display_aggregates_only); + run_results.file_report_aggregates_only = + FLAGS_benchmark_report_aggregates_only; + if (b.aggregation_report_mode() != internal::ARM_Unspecified) { + run_results.display_report_aggregates_only = + (b.aggregation_report_mode() & + internal::ARM_DisplayReportAggregatesOnly); + run_results.file_report_aggregates_only = + (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); + BM_CHECK(FLAGS_benchmark_perf_counters.empty() || + perf_counters_measurement.IsValid()) + << "Perf counters were requested but could not be set up."; + } +} + +BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { + BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; + + std::unique_ptr manager; + manager.reset(new internal::ThreadManager(b.threads())); + + // Run all but one thread in separate threads + for (std::size_t ti = 0; ti < pool.size(); ++ti) { + pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti + 1), + manager.get(), perf_counters_measurement_ptr); + } + // And run one thread here directly. + // (If we were asked to run just one thread, we don't create new threads.) + // Yes, we need to do this here *after* we start the separate threads. + RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr); + + // The main thread has finished. Now let's wait for the other threads. + manager->WaitForAllThreads(); + for (std::thread& thread : pool) thread.join(); + + IterationResults i; + // Acquire the measurements/counters from the manager, UNDER THE LOCK! + { + MutexLock l(manager->GetBenchmarkMutex()); + i.results = manager->results; + } + + // And get rid of the manager. + manager.reset(); + + // Adjust real/manual time stats since they were reported per thread. + i.results.real_time_used /= b.threads(); + i.results.manual_time_used /= b.threads(); + // If we were measuring whole-process CPU usage, adjust the CPU time too. + if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); + + BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" + << i.results.real_time_used << "\n"; + + // By using KeepRunningBatch a benchmark can iterate more times than + // requested, so take the iteration count from i.results. + i.iters = i.results.iterations / b.threads(); + + // Base decisions off of real time if requested by this benchmark. + i.seconds = i.results.cpu_time_used; + if (b.use_manual_time()) { + i.seconds = i.results.manual_time_used; + } else if (b.use_real_time()) { + i.seconds = i.results.real_time_used; + } + + return i; +} + +IterationCount BenchmarkRunner::PredictNumItersNeeded( + const IterationResults& i) const { + // See how much iterations should be increased by. + // Note: Avoid division by zero with max(seconds, 1ns). + double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); + // If our last run was at least 10% of FLAGS_benchmark_min_time then we + // use the multiplier directly. + // Otherwise we use at most 10 times expansion. + // NOTE: When the last run was at least 10% of the min time the max + // expansion should be 14x. + bool is_significant = (i.seconds / min_time) > 0.1; + multiplier = is_significant ? multiplier : 10.0; + + // So what seems to be the sufficiently-large iteration count? Round up. + const IterationCount max_next_iters = static_cast( + std::lround(std::max(multiplier * static_cast(i.iters), + static_cast(i.iters) + 1.0))); + // But we do have *some* sanity limits though.. + const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); + + BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; + return next_iters; // round up before conversion to integer. +} + +bool BenchmarkRunner::ShouldReportIterationResults( + const IterationResults& i) const { + // Determine if this run should be reported; + // Either it has run for a sufficient amount of time + // or because an error was reported. + return i.results.has_error_ || + i.iters >= kMaxIterations || // Too many iterations already. + i.seconds >= min_time || // The elapsed time is large enough. + // CPU time is specified but the elapsed real time greatly exceeds + // the minimum time. + // Note that user provided timers are except from this sanity check. + ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); +} + +void BenchmarkRunner::DoOneRepetition() { + assert(HasRepeatsRemaining() && "Already done all repetitions?"); + + const bool is_the_first_repetition = num_repetitions_done == 0; + IterationResults i; + + // We *may* be gradually increasing the length (iteration count) + // of the benchmark until we decide the results are significant. + // And once we do, we report those last results and exit. + // Please do note that the if there are repetitions, the iteration count + // is *only* calculated for the *first* repetition, and other repetitions + // simply use that precomputed iteration count. + for (;;) { + b.Setup(); + i = DoNIterations(); + b.Teardown(); + + // Do we consider the results to be significant? + // If we are doing repetitions, and the first repetition was already done, + // it has calculated the correct iteration time, so we have run that very + // iteration count just now. No need to calculate anything. Just report. + // Else, the normal rules apply. + const bool results_are_significant = !is_the_first_repetition || + has_explicit_iteration_count || + ShouldReportIterationResults(i); + + if (results_are_significant) break; // Good, let's report them! + + // Nope, bad iteration. Let's re-estimate the hopefully-sufficient + // iteration count, and run the benchmark again... + + iters = PredictNumItersNeeded(i); + assert(iters > i.iters && + "if we did more iterations than we want to do the next time, " + "then we should have accepted the current iteration run."); + } + + // Oh, one last thing, we need to also produce the 'memory measurements'.. + MemoryManager::Result* memory_result = nullptr; + IterationCount memory_iterations = 0; + if (memory_manager != nullptr) { + // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an + // optional so we don't have to own the Result here. + // Can't do it now due to cxx03. + memory_results.push_back(MemoryManager::Result()); + memory_result = &memory_results.back(); + // Only run a few iterations to reduce the impact of one-time + // allocations in benchmarks that are not properly managed. + memory_iterations = std::min(16, iters); + memory_manager->Start(); + std::unique_ptr manager; + manager.reset(new internal::ThreadManager(1)); + b.Setup(); + RunInThread(&b, memory_iterations, 0, manager.get(), + perf_counters_measurement_ptr); + manager->WaitForAllThreads(); + manager.reset(); + b.Teardown(); + + BENCHMARK_DISABLE_DEPRECATED_WARNING + memory_manager->Stop(memory_result); + BENCHMARK_RESTORE_DEPRECATED_WARNING + } + + // Ok, now actually report. + BenchmarkReporter::Run report = + CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, + num_repetitions_done, repeats); + + if (reports_for_family) { + ++reports_for_family->num_runs_done; + if (!report.error_occurred) reports_for_family->Runs.push_back(report); + } + + run_results.non_aggregates.push_back(report); + + ++num_repetitions_done; +} + +RunResults&& BenchmarkRunner::GetResults() { + assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?"); + + // Calculate additional statistics over the repetitions of this instance. + run_results.aggregates_only = ComputeStats(run_results.non_aggregates); + + return std::move(run_results); } } // end namespace internal diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index 96e8282..752eefd 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -15,19 +15,22 @@ #ifndef BENCHMARK_RUNNER_H_ #define BENCHMARK_RUNNER_H_ +#include +#include + #include "benchmark_api_internal.h" #include "internal_macros.h" - -DECLARE_double(benchmark_min_time); - -DECLARE_int32(benchmark_repetitions); - -DECLARE_bool(benchmark_report_aggregates_only); - -DECLARE_bool(benchmark_display_aggregates_only); +#include "perf_counters.h" +#include "thread_manager.h" namespace benchmark { +BM_DECLARE_double(benchmark_min_time); +BM_DECLARE_int32(benchmark_repetitions); +BM_DECLARE_bool(benchmark_report_aggregates_only); +BM_DECLARE_bool(benchmark_display_aggregates_only); +BM_DECLARE_string(benchmark_perf_counters); + namespace internal { extern MemoryManager* memory_manager; @@ -40,9 +43,59 @@ struct RunResults { bool file_report_aggregates_only = false; }; -RunResults RunBenchmark( - const benchmark::internal::BenchmarkInstance& b, - std::vector* complexity_reports); +class BenchmarkRunner { + public: + BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, + BenchmarkReporter::PerFamilyRunReports* reports_for_family); + + int GetNumRepeats() const { return repeats; } + + bool HasRepeatsRemaining() const { + return GetNumRepeats() != num_repetitions_done; + } + + void DoOneRepetition(); + + RunResults&& GetResults(); + + BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const { + return reports_for_family; + } + + private: + RunResults run_results; + + const benchmark::internal::BenchmarkInstance& b; + BenchmarkReporter::PerFamilyRunReports* reports_for_family; + + const double min_time; + const int repeats; + const bool has_explicit_iteration_count; + + int num_repetitions_done = 0; + + std::vector pool; + + std::vector memory_results; + + IterationCount iters; // preserved between repetitions! + // So only the first repetition has to find/calculate it, + // the other repetitions will just use that precomputed iteration count. + + PerfCountersMeasurement perf_counters_measurement; + PerfCountersMeasurement* const perf_counters_measurement_ptr; + + struct IterationResults { + internal::ThreadManager::Result results; + IterationCount iters; + double seconds; + }; + IterationResults DoNIterations(); + + IterationCount PredictNumItersNeeded(const IterationResults& i) const; + + bool ShouldReportIterationResults(const IterationResults& i) const; +}; } // namespace internal diff --git a/src/check.h b/src/check.h index f5f8253..90c7bbf 100644 --- a/src/check.h +++ b/src/check.h @@ -23,8 +23,9 @@ BENCHMARK_NORETURN inline void CallAbortHandler() { std::abort(); // fallback to enforce noreturn } -// CheckHandler is the class constructed by failing CHECK macros. CheckHandler -// will log information about the failures and abort when it is destructed. +// CheckHandler is the class constructed by failing BM_CHECK macros. +// CheckHandler will log information about the failures and abort when it is +// destructed. class CheckHandler { public: CheckHandler(const char* check, const char* file, const char* func, int line) @@ -35,10 +36,17 @@ class CheckHandler { LogType& GetLog() { return log_; } +#if defined(COMPILER_MSVC) +#pragma warning(push) +#pragma warning(disable : 4722) +#endif BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) { log_ << std::endl; CallAbortHandler(); } +#if defined(COMPILER_MSVC) +#pragma warning(pop) +#endif CheckHandler& operator=(const CheckHandler&) = delete; CheckHandler(const CheckHandler&) = delete; @@ -51,32 +59,32 @@ class CheckHandler { } // end namespace internal } // end namespace benchmark -// The CHECK macro returns a std::ostream object that can have extra information -// written to it. +// The BM_CHECK macro returns a std::ostream object that can have extra +// information written to it. #ifndef NDEBUG -#define CHECK(b) \ +#define BM_CHECK(b) \ (b ? ::benchmark::internal::GetNullLogInstance() \ : ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \ .GetLog()) #else -#define CHECK(b) ::benchmark::internal::GetNullLogInstance() +#define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance() #endif // clang-format off // preserve whitespacing between operators for alignment -#define CHECK_EQ(a, b) CHECK((a) == (b)) -#define CHECK_NE(a, b) CHECK((a) != (b)) -#define CHECK_GE(a, b) CHECK((a) >= (b)) -#define CHECK_LE(a, b) CHECK((a) <= (b)) -#define CHECK_GT(a, b) CHECK((a) > (b)) -#define CHECK_LT(a, b) CHECK((a) < (b)) +#define BM_CHECK_EQ(a, b) BM_CHECK((a) == (b)) +#define BM_CHECK_NE(a, b) BM_CHECK((a) != (b)) +#define BM_CHECK_GE(a, b) BM_CHECK((a) >= (b)) +#define BM_CHECK_LE(a, b) BM_CHECK((a) <= (b)) +#define BM_CHECK_GT(a, b) BM_CHECK((a) > (b)) +#define BM_CHECK_LT(a, b) BM_CHECK((a) < (b)) -#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) < (eps)) -#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps)) -#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps)) -#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps)) -#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) > (eps)) -#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) > (eps)) +#define BM_CHECK_FLOAT_EQ(a, b, eps) BM_CHECK(std::fabs((a) - (b)) < (eps)) +#define BM_CHECK_FLOAT_NE(a, b, eps) BM_CHECK(std::fabs((a) - (b)) >= (eps)) +#define BM_CHECK_FLOAT_GE(a, b, eps) BM_CHECK((a) - (b) > -(eps)) +#define BM_CHECK_FLOAT_LE(a, b, eps) BM_CHECK((b) - (a) > -(eps)) +#define BM_CHECK_FLOAT_GT(a, b, eps) BM_CHECK((a) - (b) > (eps)) +#define BM_CHECK_FLOAT_LT(a, b, eps) BM_CHECK((b) - (a) > (eps)) //clang-format on #endif // CHECK_H_ diff --git a/src/colorprint.cc b/src/colorprint.cc index fff6a98..1a000a0 100644 --- a/src/colorprint.cc +++ b/src/colorprint.cc @@ -25,8 +25,8 @@ #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS -#include #include +#include #else #include #endif // BENCHMARK_OS_WINDOWS @@ -94,7 +94,7 @@ std::string FormatString(const char* msg, va_list args) { va_end(args_cp); // currently there is no error handling for failure, so this is hack. - CHECK(ret >= 0); + BM_CHECK(ret >= 0); if (ret == 0) // handle empty expansion return {}; @@ -102,10 +102,10 @@ std::string FormatString(const char* msg, va_list args) { return local_buff; else { // we did not provide a long enough buffer on our first attempt. - size = (size_t)ret + 1; // + 1 for the null byte + size = static_cast(ret) + 1; // + 1 for the null byte std::unique_ptr buff(new char[size]); ret = vsnprintf(buff.get(), size, msg, args); - CHECK(ret > 0 && ((size_t)ret) < size); + BM_CHECK(ret > 0 && (static_cast(ret)) < size); return buff.get(); } } diff --git a/src/commandlineflags.cc b/src/commandlineflags.cc index 0648fe3..9615e35 100644 --- a/src/commandlineflags.cc +++ b/src/commandlineflags.cc @@ -20,6 +20,10 @@ #include #include #include +#include +#include + +#include "../src/string_util.h" namespace benchmark { namespace { @@ -78,6 +82,30 @@ bool ParseDouble(const std::string& src_text, const char* str, double* value) { return true; } +// Parses 'str' into KV pairs. If successful, writes the result to *value and +// returns true; otherwise leaves *value unchanged and returns false. +bool ParseKvPairs(const std::string& src_text, const char* str, + std::map* value) { + std::map kvs; + for (const auto& kvpair : StrSplit(str, ',')) { + const auto kv = StrSplit(kvpair, '='); + if (kv.size() != 2) { + std::cerr << src_text << " is expected to be a comma-separated list of " + << "= strings, but actually has value \"" << str + << "\".\n"; + return false; + } + if (!kvs.emplace(kv[0], kv[1]).second) { + std::cerr << src_text << " is expected to contain unique keys but key \"" + << kv[0] << "\" was repeated.\n"; + return false; + } + } + + *value = kvs; + return true; +} + // Returns the name of the environment variable corresponding to the // given flag. For example, FlagToEnvVar("foo") will return // "BENCHMARK_FOO" in the open-source version. @@ -129,6 +157,20 @@ const char* StringFromEnv(const char* flag, const char* default_val) { return value == nullptr ? default_val : value; } +std::map KvPairsFromEnv( + const char* flag, std::map default_val) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value_str = getenv(env_var.c_str()); + + if (value_str == nullptr) return default_val; + + std::map value; + if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) { + return default_val; + } + return value; +} + // Parses a string as a command line flag. The string should have // the format "--flag=value". When def_optional is true, the "=value" // part can be omitted. @@ -206,6 +248,21 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value) { return true; } +bool ParseKeyValueFlag(const char* str, const char* flag, + std::map* value) { + const char* const value_str = ParseFlagValue(str, flag, false); + + if (value_str == nullptr) return false; + + for (const auto& kvpair : StrSplit(value_str, ',')) { + const auto kv = StrSplit(kvpair, '='); + if (kv.size() != 2) return false; + value->emplace(kv[0], kv[1]); + } + + return true; +} + bool IsFlag(const char* str, const char* flag) { return (ParseFlagValue(str, flag, true) != nullptr); } diff --git a/src/commandlineflags.h b/src/commandlineflags.h index 3a1f6a8..5baaf11 100644 --- a/src/commandlineflags.h +++ b/src/commandlineflags.h @@ -2,61 +2,70 @@ #define BENCHMARK_COMMANDLINEFLAGS_H_ #include +#include #include // Macro for referencing flags. #define FLAG(name) FLAGS_##name // Macros for declaring flags. -#define DECLARE_bool(name) extern bool FLAG(name) -#define DECLARE_int32(name) extern int32_t FLAG(name) -#define DECLARE_double(name) extern double FLAG(name) -#define DECLARE_string(name) extern std::string FLAG(name) +#define BM_DECLARE_bool(name) extern bool FLAG(name) +#define BM_DECLARE_int32(name) extern int32_t FLAG(name) +#define BM_DECLARE_double(name) extern double FLAG(name) +#define BM_DECLARE_string(name) extern std::string FLAG(name) +#define BM_DECLARE_kvpairs(name) \ + extern std::map FLAG(name) // Macros for defining flags. -#define DEFINE_bool(name, default_val) \ - bool FLAG(name) = \ - benchmark::BoolFromEnv(#name, default_val) -#define DEFINE_int32(name, default_val) \ - int32_t FLAG(name) = \ - benchmark::Int32FromEnv(#name, default_val) -#define DEFINE_double(name, default_val) \ - double FLAG(name) = \ - benchmark::DoubleFromEnv(#name, default_val) -#define DEFINE_string(name, default_val) \ - std::string FLAG(name) = \ - benchmark::StringFromEnv(#name, default_val) +#define BM_DEFINE_bool(name, default_val) \ + bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) +#define BM_DEFINE_int32(name, default_val) \ + int32_t FLAG(name) = benchmark::Int32FromEnv(#name, default_val) +#define BM_DEFINE_double(name, default_val) \ + double FLAG(name) = benchmark::DoubleFromEnv(#name, default_val) +#define BM_DEFINE_string(name, default_val) \ + std::string FLAG(name) = benchmark::StringFromEnv(#name, default_val) +#define BM_DEFINE_kvpairs(name, default_val) \ + std::map FLAG(name) = \ + benchmark::KvPairsFromEnv(#name, default_val) namespace benchmark { -// Parses a bool from the environment variable -// corresponding to the given flag. +// Parses a bool from the environment variable corresponding to the given flag. // // If the variable exists, returns IsTruthyFlagValue() value; if not, // returns the given default value. bool BoolFromEnv(const char* flag, bool default_val); -// Parses an Int32 from the environment variable -// corresponding to the given flag. +// Parses an Int32 from the environment variable corresponding to the given +// flag. // // If the variable exists, returns ParseInt32() value; if not, returns // the given default value. int32_t Int32FromEnv(const char* flag, int32_t default_val); -// Parses an Double from the environment variable -// corresponding to the given flag. +// Parses an Double from the environment variable corresponding to the given +// flag. // // If the variable exists, returns ParseDouble(); if not, returns // the given default value. double DoubleFromEnv(const char* flag, double default_val); -// Parses a string from the environment variable -// corresponding to the given flag. +// Parses a string from the environment variable corresponding to the given +// flag. // // If variable exists, returns its value; if not, returns // the given default value. const char* StringFromEnv(const char* flag, const char* default_val); +// Parses a set of kvpairs from the environment variable corresponding to the +// given flag. +// +// If variable exists, returns its value; if not, returns +// the given default value. +std::map KvPairsFromEnv( + const char* flag, std::map default_val); + // Parses a string for a bool flag, in the form of either // "--flag=value" or "--flag". // @@ -68,27 +77,31 @@ const char* StringFromEnv(const char* flag, const char* default_val); // true. On failure, returns false without changing *value. bool ParseBoolFlag(const char* str, const char* flag, bool* value); -// Parses a string for an Int32 flag, in the form of -// "--flag=value". +// Parses a string for an Int32 flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. bool ParseInt32Flag(const char* str, const char* flag, int32_t* value); -// Parses a string for a Double flag, in the form of -// "--flag=value". +// Parses a string for a Double flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. bool ParseDoubleFlag(const char* str, const char* flag, double* value); -// Parses a string for a string flag, in the form of -// "--flag=value". +// Parses a string for a string flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. bool ParseStringFlag(const char* str, const char* flag, std::string* value); +// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value" +// +// On success, stores the value of the flag in *value and returns true. On +// failure returns false, though *value may have been mutated. +bool ParseKeyValueFlag(const char* str, const char* flag, + std::map* value); + // Returns true if the string matches the flag. bool IsFlag(const char* str, const char* flag); diff --git a/src/complexity.cc b/src/complexity.cc index aeed67f..825c573 100644 --- a/src/complexity.cc +++ b/src/complexity.cc @@ -15,12 +15,13 @@ // Source project : https://github.com/ismaelJimenez/cpp.leastsq // Adapted to be used with google benchmark -#include "benchmark/benchmark.h" +#include "complexity.h" #include #include + +#include "benchmark/benchmark.h" #include "check.h" -#include "complexity.h" namespace benchmark { @@ -82,7 +83,6 @@ std::string GetBigOString(BigO complexity) { LeastSq MinimalLeastSq(const std::vector& n, const std::vector& time, BigOFunc* fitting_curve) { - double sigma_gn = 0.0; double sigma_gn_squared = 0.0; double sigma_time = 0.0; double sigma_time_gn = 0.0; @@ -90,7 +90,6 @@ LeastSq MinimalLeastSq(const std::vector& n, // Calculate least square fitting parameter for (size_t i = 0; i < n.size(); ++i) { double gn_i = fitting_curve(n[i]); - sigma_gn += gn_i; sigma_gn_squared += gn_i * gn_i; sigma_time += time[i]; sigma_time_gn += time[i] * gn_i; @@ -125,10 +124,10 @@ LeastSq MinimalLeastSq(const std::vector& n, // fitting curve. LeastSq MinimalLeastSq(const std::vector& n, const std::vector& time, const BigO complexity) { - CHECK_EQ(n.size(), time.size()); - CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two - // benchmark runs are given - CHECK_NE(complexity, oNone); + BM_CHECK_EQ(n.size(), time.size()); + BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two + // benchmark runs are given + BM_CHECK_NE(complexity, oNone); LeastSq best_fit; @@ -169,7 +168,8 @@ std::vector ComputeBigO( // Populate the accumulators. for (const Run& run : reports) { - CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?"; + BM_CHECK_GT(run.complexity_n, 0) + << "Did you forget to call SetComplexityN?"; n.push_back(run.complexity_n); real_time.push_back(run.real_accumulated_time / run.iterations); cpu_time.push_back(run.cpu_accumulated_time / run.iterations); @@ -193,11 +193,14 @@ std::vector ComputeBigO( // Get the data from the accumulator to BenchmarkReporter::Run's. Run big_o; big_o.run_name = run_name; + big_o.family_index = reports[0].family_index; + big_o.per_family_instance_index = reports[0].per_family_instance_index; big_o.run_type = BenchmarkReporter::Run::RT_Aggregate; big_o.repetitions = reports[0].repetitions; big_o.repetition_index = Run::no_repetition_index; big_o.threads = reports[0].threads; big_o.aggregate_name = "BigO"; + big_o.aggregate_unit = StatisticUnit::kTime; big_o.report_label = reports[0].report_label; big_o.iterations = 0; big_o.real_accumulated_time = result_real.coef; @@ -215,8 +218,11 @@ std::vector ComputeBigO( // Only add label to mean/stddev if it is same for all runs Run rms; rms.run_name = run_name; + rms.family_index = reports[0].family_index; + rms.per_family_instance_index = reports[0].per_family_instance_index; rms.run_type = BenchmarkReporter::Run::RT_Aggregate; rms.aggregate_name = "RMS"; + rms.aggregate_unit = StatisticUnit::kPercentage; rms.report_label = big_o.report_label; rms.iterations = 0; rms.repetition_index = Run::no_repetition_index; diff --git a/src/console_reporter.cc b/src/console_reporter.cc index 6fd7645..04cc0b7 100644 --- a/src/console_reporter.cc +++ b/src/console_reporter.cc @@ -45,7 +45,7 @@ bool ConsoleReporter::ReportContext(const Context& context) { GetErrorStream() << "Color printing is only supported for stdout on windows." " Disabling color printing\n"; - output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color); + output_options_ = static_cast(output_options_ & ~OO_Color); } #endif @@ -53,11 +53,12 @@ bool ConsoleReporter::ReportContext(const Context& context) { } void ConsoleReporter::PrintHeader(const Run& run) { - std::string str = FormatString("%-*s %13s %15s %12s", static_cast(name_field_width_), - "Benchmark", "Time", "CPU", "Iterations"); - if(!run.counters.empty()) { - if(output_options_ & OO_Tabular) { - for(auto const& c : run.counters) { + std::string str = + FormatString("%-*s %13s %15s %12s", static_cast(name_field_width_), + "Benchmark", "Time", "CPU", "Iterations"); + if (!run.counters.empty()) { + if (output_options_ & OO_Tabular) { + for (auto const& c : run.counters) { str += FormatString(" %10s", c.first.c_str()); } } else { @@ -97,7 +98,6 @@ static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt, va_end(args); } - static std::string FormatTime(double time) { // Align decimal places... if (time < 1.0) { @@ -115,8 +115,9 @@ static std::string FormatTime(double time) { void ConsoleReporter::PrintRunData(const Run& result) { typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...); auto& Out = GetOutputStream(); - PrinterFn* printer = (output_options_ & OO_Color) ? - (PrinterFn*)ColorPrintf : IgnoreColorPrint; + PrinterFn* printer = (output_options_ & OO_Color) + ? static_cast(ColorPrintf) + : IgnoreColorPrint; auto name_color = (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN; printer(Out, name_color, "%-*s ", name_field_width_, @@ -134,18 +135,23 @@ void ConsoleReporter::PrintRunData(const Run& result) { const std::string real_time_str = FormatTime(real_time); const std::string cpu_time_str = FormatTime(cpu_time); - if (result.report_big_o) { std::string big_o = GetBigOString(result.complexity); - printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, big_o.c_str(), - cpu_time, big_o.c_str()); + printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, + big_o.c_str(), cpu_time, big_o.c_str()); } else if (result.report_rms) { printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%", cpu_time * 100, "%"); - } else { + } else if (result.run_type != Run::RT_Aggregate || + result.aggregate_unit == StatisticUnit::kTime) { const char* timeLabel = GetTimeUnitString(result.time_unit); - printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel, - cpu_time_str.c_str(), timeLabel); + printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), + timeLabel, cpu_time_str.c_str(), timeLabel); + } else { + assert(result.aggregate_unit == StatisticUnit::kPercentage); + printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", + (100. * result.real_accumulated_time), "%", + (100. * result.cpu_accumulated_time), "%"); } if (!result.report_big_o && !result.report_rms) { @@ -153,12 +159,19 @@ void ConsoleReporter::PrintRunData(const Run& result) { } for (auto& c : result.counters) { - const std::size_t cNameLen = std::max(std::string::size_type(10), - c.first.length()); - auto const& s = HumanReadableNumber(c.second.value, c.second.oneK); + const std::size_t cNameLen = + std::max(std::string::size_type(10), c.first.length()); + std::string s; const char* unit = ""; - if (c.second.flags & Counter::kIsRate) - unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; + if (result.run_type == Run::RT_Aggregate && + result.aggregate_unit == StatisticUnit::kPercentage) { + s = StrFormat("%.2f", 100. * c.second.value); + unit = "%"; + } else { + s = HumanReadableNumber(c.second.value, c.second.oneK); + if (c.second.flags & Counter::kIsRate) + unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; + } if (output_options_ & OO_Tabular) { printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(), unit); diff --git a/src/csv_reporter.cc b/src/csv_reporter.cc index af2c18f..1c5e9fa 100644 --- a/src/csv_reporter.cc +++ b/src/csv_reporter.cc @@ -12,9 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" -#include "complexity.h" - #include #include #include @@ -22,7 +19,9 @@ #include #include +#include "benchmark/benchmark.h" #include "check.h" +#include "complexity.h" #include "string_util.h" #include "timers.h" @@ -37,13 +36,17 @@ std::vector elements = { "error_occurred", "error_message"}; } // namespace -std::string CsvEscape(const std::string & s) { +std::string CsvEscape(const std::string& s) { std::string tmp; tmp.reserve(s.size() + 2); for (char c : s) { switch (c) { - case '"' : tmp += "\"\""; break; - default : tmp += c; break; + case '"': + tmp += "\"\""; + break; + default: + tmp += c; + break; } } return '"' + tmp + '"'; @@ -85,7 +88,8 @@ void CSVReporter::ReportRuns(const std::vector& reports) { for (const auto& cnt : run.counters) { if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") continue; - CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end()) + BM_CHECK(user_counter_names_.find(cnt.first) != + user_counter_names_.end()) << "All counters must be present in each run. " << "Counter named \"" << cnt.first << "\" was not in a run after being added to the header"; diff --git a/src/cycleclock.h b/src/cycleclock.h index 179c67c..d65d32a 100644 --- a/src/cycleclock.h +++ b/src/cycleclock.h @@ -36,7 +36,7 @@ // declarations of some other intrinsics, breaking compilation. // Therefore, we simply declare __rdtsc ourselves. See also // http://connect.microsoft.com/VisualStudio/feedback/details/262047 -#if defined(COMPILER_MSVC) && !defined(_M_IX86) +#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) extern "C" uint64_t __rdtsc(); #pragma intrinsic(__rdtsc) #endif @@ -92,7 +92,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { uint32_t tbl, tbu0, tbu1; asm volatile( "mftbu %0\n" - "mftbl %1\n" + "mftb %1\n" "mftbu %2" : "=r"(tbu0), "=r"(tbl), "=r"(tbu1)); tbl &= -static_cast(tbu0 == tbu1); @@ -114,6 +114,12 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { // when I know it will work. Otherwise, I'll use __rdtsc and hope // the code is being compiled with a non-ancient compiler. _asm rdtsc +#elif defined(COMPILER_MSVC) && defined(_M_ARM64) + // See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics + // and https://reviews.llvm.org/D53115 + int64_t virtual_timer_value; + virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT); + return virtual_timer_value; #elif defined(COMPILER_MSVC) return __rdtsc(); #elif defined(BENCHMARK_OS_NACL) @@ -161,18 +167,27 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#elif defined(__mips__) +#elif defined(__mips__) || defined(__m68k__) // mips apparently only allows rdtsc for superusers, so we fall // back to gettimeofday. It's possible clock_gettime would be better. struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__loongarch__) + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; #elif defined(__s390__) // Covers both s390 and s390x. // Return the CPU clock. uint64_t tsc; +#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL) + // z/OS XL compiler HLASM syntax. + asm(" stck %0" : "=m"(tsc) : : "cc"); +#else asm("stck %0" : "=Q"(tsc) : : "cc"); +#endif return tsc; -#elif defined(__riscv) // RISC-V +#elif defined(__riscv) // RISC-V // Use RDCYCLE (and RDCYCLEH on riscv32) #if __riscv_xlen == 32 uint32_t cycles_lo, cycles_hi0, cycles_hi1; @@ -193,6 +208,10 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { asm volatile("rdcycle %0" : "=r"(cycles)); return cycles; #endif +#elif defined(__e2k__) || defined(__elbrus__) + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; #else // The soft failover to a generic implementation is automatic only for ARM. // For other platforms the developer is expected to make an attempt to create diff --git a/src/internal_macros.h b/src/internal_macros.h index 6adf00d..91f367b 100644 --- a/src/internal_macros.h +++ b/src/internal_macros.h @@ -13,7 +13,11 @@ #endif #if defined(__clang__) - #if !defined(COMPILER_CLANG) + #if defined(__ibmxl__) + #if !defined(COMPILER_IBMXL) + #define COMPILER_IBMXL + #endif + #elif !defined(COMPILER_CLANG) #define COMPILER_CLANG #endif #elif defined(_MSC_VER) @@ -58,6 +62,8 @@ #define BENCHMARK_OS_NETBSD 1 #elif defined(__OpenBSD__) #define BENCHMARK_OS_OPENBSD 1 +#elif defined(__DragonFly__) + #define BENCHMARK_OS_DRAGONFLY 1 #elif defined(__linux__) #define BENCHMARK_OS_LINUX 1 #elif defined(__native_client__) @@ -72,6 +78,8 @@ #define BENCHMARK_OS_SOLARIS 1 #elif defined(__QNX__) #define BENCHMARK_OS_QNX 1 +#elif defined(__MVS__) +#define BENCHMARK_OS_ZOS 1 #endif #if defined(__ANDROID__) && defined(__GLIBCXX__) diff --git a/src/json_reporter.cc b/src/json_reporter.cc index 959d245..e84a4ed 100644 --- a/src/json_reporter.cc +++ b/src/json_reporter.cc @@ -12,9 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" -#include "complexity.h" - #include #include #include @@ -25,41 +22,65 @@ #include #include +#include "benchmark/benchmark.h" +#include "complexity.h" #include "string_util.h" #include "timers.h" namespace benchmark { +namespace internal { +extern std::map* global_context; +} namespace { -std::string StrEscape(const std::string & s) { +std::string StrEscape(const std::string& s) { std::string tmp; tmp.reserve(s.size()); for (char c : s) { switch (c) { - case '\b': tmp += "\\b"; break; - case '\f': tmp += "\\f"; break; - case '\n': tmp += "\\n"; break; - case '\r': tmp += "\\r"; break; - case '\t': tmp += "\\t"; break; - case '\\': tmp += "\\\\"; break; - case '"' : tmp += "\\\""; break; - default : tmp += c; break; + case '\b': + tmp += "\\b"; + break; + case '\f': + tmp += "\\f"; + break; + case '\n': + tmp += "\\n"; + break; + case '\r': + tmp += "\\r"; + break; + case '\t': + tmp += "\\t"; + break; + case '\\': + tmp += "\\\\"; + break; + case '"': + tmp += "\\\""; + break; + default: + tmp += c; + break; } } return tmp; } std::string FormatKV(std::string const& key, std::string const& value) { - return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); + return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), + StrEscape(value).c_str()); } std::string FormatKV(std::string const& key, const char* value) { - return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); + return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), + StrEscape(value).c_str()); } std::string FormatKV(std::string const& key, bool value) { - return StrFormat("\"%s\": %s", StrEscape(key).c_str(), value ? "true" : "false"); + return StrFormat("\"%s\": %s", StrEscape(key).c_str(), + value ? "true" : "false"); } std::string FormatKV(std::string const& key, int64_t value) { @@ -123,7 +144,9 @@ bool JSONReporter::ReportContext(const Context& context) { RoundDouble(info.cycles_per_second / 1000000.0)) << ",\n"; if (CPUInfo::Scaling::UNKNOWN != info.scaling) { - out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED ? true : false) + out << indent + << FormatKV("cpu_scaling_enabled", + info.scaling == CPUInfo::Scaling::ENABLED ? true : false) << ",\n"; } @@ -136,8 +159,8 @@ bool JSONReporter::ReportContext(const Context& context) { out << cache_indent << FormatKV("type", CI.type) << ",\n"; out << cache_indent << FormatKV("level", static_cast(CI.level)) << ",\n"; - out << cache_indent - << FormatKV("size", static_cast(CI.size)) << ",\n"; + out << cache_indent << FormatKV("size", static_cast(CI.size)) + << ",\n"; out << cache_indent << FormatKV("num_sharing", static_cast(CI.num_sharing)) << "\n"; @@ -159,7 +182,16 @@ bool JSONReporter::ReportContext(const Context& context) { #else const char build_type[] = "debug"; #endif - out << indent << FormatKV("library_build_type", build_type) << "\n"; + out << indent << FormatKV("library_build_type", build_type); + + if (internal::global_context != nullptr) { + for (const auto& kv : *internal::global_context) { + out << ",\n"; + out << indent << FormatKV(kv.first, kv.second); + } + } + out << "\n"; + // Close context block and open the list of benchmarks. out << inner_indent << "},\n"; out << inner_indent << "\"benchmarks\": [\n"; @@ -197,6 +229,10 @@ void JSONReporter::PrintRunData(Run const& run) { std::string indent(6, ' '); std::ostream& out = GetOutputStream(); out << indent << FormatKV("name", run.benchmark_name()) << ",\n"; + out << indent << FormatKV("family_index", run.family_index) << ",\n"; + out << indent + << FormatKV("per_family_instance_index", run.per_family_instance_index) + << ",\n"; out << indent << FormatKV("run_name", run.run_name.str()) << ",\n"; out << indent << FormatKV("run_type", [&run]() -> const char* { switch (run.run_type) { @@ -215,6 +251,15 @@ void JSONReporter::PrintRunData(Run const& run) { out << indent << FormatKV("threads", run.threads) << ",\n"; if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) { out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n"; + out << indent << FormatKV("aggregate_unit", [&run]() -> const char* { + switch (run.aggregate_unit) { + case StatisticUnit::kTime: + return "time"; + case StatisticUnit::kPercentage: + return "percentage"; + } + BENCHMARK_UNREACHABLE(); + }()) << ",\n"; } if (run.error_occurred) { out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n"; @@ -222,8 +267,17 @@ void JSONReporter::PrintRunData(Run const& run) { } if (!run.report_big_o && !run.report_rms) { out << indent << FormatKV("iterations", run.iterations) << ",\n"; - out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n"; - out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); + if (run.run_type != Run::RT_Aggregate || + run.aggregate_unit == StatisticUnit::kTime) { + out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) + << ",\n"; + out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); + } else { + assert(run.aggregate_unit == StatisticUnit::kPercentage); + out << indent << FormatKV("real_time", run.real_accumulated_time) + << ",\n"; + out << indent << FormatKV("cpu_time", run.cpu_accumulated_time); + } out << ",\n" << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); } else if (run.report_big_o) { @@ -241,9 +295,20 @@ void JSONReporter::PrintRunData(Run const& run) { out << ",\n" << indent << FormatKV(c.first, c.second); } - if (run.has_memory_result) { + if (run.memory_result) { + const MemoryManager::Result memory_result = *run.memory_result; out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter); - out << ",\n" << indent << FormatKV("max_bytes_used", run.max_bytes_used); + out << ",\n" + << indent << FormatKV("max_bytes_used", memory_result.max_bytes_used); + + auto report_if_present = [&out, &indent](const char* label, int64_t val) { + if (val != MemoryManager::TombstoneValue) + out << ",\n" << indent << FormatKV(label, val); + }; + + report_if_present("total_allocated_bytes", + memory_result.total_allocated_bytes); + report_if_present("net_heap_growth", memory_result.net_heap_growth); } if (!run.report_label.empty()) { @@ -252,4 +317,7 @@ void JSONReporter::PrintRunData(Run const& run) { out << '\n'; } +const int64_t MemoryManager::TombstoneValue = + std::numeric_limits::max(); + } // end namespace benchmark diff --git a/src/log.h b/src/log.h index 47d0c35..48c071a 100644 --- a/src/log.h +++ b/src/log.h @@ -67,7 +67,7 @@ inline LogType& GetLogInstanceForLevel(int level) { } // end namespace benchmark // clang-format off -#define VLOG(x) \ +#define BM_VLOG(x) \ (::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \ " ") // clang-format on diff --git a/src/mutex.h b/src/mutex.h index 3fac79a..bec78d9 100644 --- a/src/mutex.h +++ b/src/mutex.h @@ -9,60 +9,60 @@ // Enable thread safety attributes only with clang. // The attributes can be safely erased when compiling with other compilers. #if defined(HAVE_THREAD_SAFETY_ATTRIBUTES) -#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) +#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x)) #else -#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op +#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op #endif -#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(capability(x)) +#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x)) -#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) +#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable) -#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) +#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x)) -#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x)) +#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x)) #define ACQUIRED_BEFORE(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__)) #define ACQUIRED_AFTER(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__)) #define REQUIRES(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__)) #define REQUIRES_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__)) #define ACQUIRE(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__)) #define ACQUIRE_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__)) #define RELEASE(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__)) #define RELEASE_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__)) #define TRY_ACQUIRE(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__)) #define TRY_ACQUIRE_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__)) -#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__)) +#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__)) -#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x)) +#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x)) #define ASSERT_SHARED_CAPABILITY(x) \ - THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x)) + THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x)) -#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x)) +#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x)) #define NO_THREAD_SAFETY_ANALYSIS \ - THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis) + THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis) namespace benchmark { @@ -130,7 +130,7 @@ class Barrier { // entered the barrier. Returns iff this is the last thread to // enter the barrier. bool createBarrier(MutexLock& ml) REQUIRES(lock_) { - CHECK_LT(entered_, running_threads_); + BM_CHECK_LT(entered_, running_threads_); entered_++; if (entered_ < running_threads_) { // Wait for all threads to enter diff --git a/src/perf_counters.cc b/src/perf_counters.cc new file mode 100644 index 0000000..b2ac768 --- /dev/null +++ b/src/perf_counters.cc @@ -0,0 +1,132 @@ +// Copyright 2021 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "perf_counters.h" + +#include +#include + +#if defined HAVE_LIBPFM +#include "perfmon/pfmlib.h" +#include "perfmon/pfmlib_perf_event.h" +#endif + +namespace benchmark { +namespace internal { + +constexpr size_t PerfCounterValues::kMaxCounters; + +#if defined HAVE_LIBPFM +const bool PerfCounters::kSupported = true; + +bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; } + +PerfCounters PerfCounters::Create( + const std::vector& counter_names) { + if (counter_names.empty()) { + return NoCounters(); + } + if (counter_names.size() > PerfCounterValues::kMaxCounters) { + GetErrorLogInstance() + << counter_names.size() + << " counters were requested. The minimum is 1, the maximum is " + << PerfCounterValues::kMaxCounters << "\n"; + return NoCounters(); + } + std::vector counter_ids(counter_names.size()); + + const int mode = PFM_PLM3; // user mode only + for (size_t i = 0; i < counter_names.size(); ++i) { + const bool is_first = i == 0; + struct perf_event_attr attr {}; + attr.size = sizeof(attr); + const int group_id = !is_first ? counter_ids[0] : -1; + const auto& name = counter_names[i]; + if (name.empty()) { + GetErrorLogInstance() << "A counter name was the empty string\n"; + return NoCounters(); + } + pfm_perf_encode_arg_t arg{}; + arg.attr = &attr; + + const int pfm_get = + pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg); + if (pfm_get != PFM_SUCCESS) { + GetErrorLogInstance() << "Unknown counter name: " << name << "\n"; + return NoCounters(); + } + attr.disabled = is_first; + // Note: the man page for perf_event_create suggests inerit = true and + // read_format = PERF_FORMAT_GROUP don't work together, but that's not the + // case. + attr.inherit = true; + attr.pinned = is_first; + attr.exclude_kernel = true; + attr.exclude_user = false; + attr.exclude_hv = true; + // Read all counters in one read. + attr.read_format = PERF_FORMAT_GROUP; + + int id = -1; + static constexpr size_t kNrOfSyscallRetries = 5; + // Retry syscall as it was interrupted often (b/64774091). + for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; + ++num_retries) { + id = perf_event_open(&attr, 0, -1, group_id, 0); + if (id >= 0 || errno != EINTR) { + break; + } + } + if (id < 0) { + GetErrorLogInstance() + << "Failed to get a file descriptor for " << name << "\n"; + return NoCounters(); + } + + counter_ids[i] = id; + } + if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) { + GetErrorLogInstance() << "Failed to start counters\n"; + return NoCounters(); + } + + return PerfCounters(counter_names, std::move(counter_ids)); +} + +PerfCounters::~PerfCounters() { + if (counter_ids_.empty()) { + return; + } + ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE); + for (int fd : counter_ids_) { + close(fd); + } +} +#else // defined HAVE_LIBPFM +const bool PerfCounters::kSupported = false; + +bool PerfCounters::Initialize() { return false; } + +PerfCounters PerfCounters::Create( + const std::vector& counter_names) { + if (!counter_names.empty()) { + GetErrorLogInstance() << "Performance counters not supported."; + } + return NoCounters(); +} + +PerfCounters::~PerfCounters() = default; +#endif // defined HAVE_LIBPFM +} // namespace internal +} // namespace benchmark diff --git a/src/perf_counters.h b/src/perf_counters.h new file mode 100644 index 0000000..47ca138 --- /dev/null +++ b/src/perf_counters.h @@ -0,0 +1,172 @@ +// Copyright 2021 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BENCHMARK_PERF_COUNTERS_H +#define BENCHMARK_PERF_COUNTERS_H + +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "check.h" +#include "log.h" + +#ifndef BENCHMARK_OS_WINDOWS +#include +#endif + +namespace benchmark { +namespace internal { + +// Typically, we can only read a small number of counters. There is also a +// padding preceding counter values, when reading multiple counters with one +// syscall (which is desirable). PerfCounterValues abstracts these details. +// The implementation ensures the storage is inlined, and allows 0-based +// indexing into the counter values. +// The object is used in conjunction with a PerfCounters object, by passing it +// to Snapshot(). The values are populated such that +// perfCounters->names()[i]'s value is obtained at position i (as given by +// operator[]) of this object. +class PerfCounterValues { + public: + explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { + BM_CHECK_LE(nr_counters_, kMaxCounters); + } + + uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; } + + static constexpr size_t kMaxCounters = 3; + + private: + friend class PerfCounters; + // Get the byte buffer in which perf counters can be captured. + // This is used by PerfCounters::Read + std::pair get_data_buffer() { + return {reinterpret_cast(values_.data()), + sizeof(uint64_t) * (kPadding + nr_counters_)}; + } + + static constexpr size_t kPadding = 1; + std::array values_; + const size_t nr_counters_; +}; + +// Collect PMU counters. The object, once constructed, is ready to be used by +// calling read(). PMU counter collection is enabled from the time create() is +// called, to obtain the object, until the object's destructor is called. +class PerfCounters final { + public: + // True iff this platform supports performance counters. + static const bool kSupported; + + bool IsValid() const { return is_valid_; } + static PerfCounters NoCounters() { return PerfCounters(); } + + ~PerfCounters(); + PerfCounters(PerfCounters&&) = default; + PerfCounters(const PerfCounters&) = delete; + + // Platform-specific implementations may choose to do some library + // initialization here. + static bool Initialize(); + + // Return a PerfCounters object ready to read the counters with the names + // specified. The values are user-mode only. The counter name format is + // implementation and OS specific. + // TODO: once we move to C++-17, this should be a std::optional, and then the + // IsValid() boolean can be dropped. + static PerfCounters Create(const std::vector& counter_names); + + // Take a snapshot of the current value of the counters into the provided + // valid PerfCounterValues storage. The values are populated such that: + // names()[i]'s value is (*values)[i] + BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { +#ifndef BENCHMARK_OS_WINDOWS + assert(values != nullptr); + assert(IsValid()); + auto buffer = values->get_data_buffer(); + auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second); + return static_cast(read_bytes) == buffer.second; +#else + (void)values; + return false; +#endif + } + + const std::vector& names() const { return counter_names_; } + size_t num_counters() const { return counter_names_.size(); } + + private: + PerfCounters(const std::vector& counter_names, + std::vector&& counter_ids) + : counter_ids_(std::move(counter_ids)), + counter_names_(counter_names), + is_valid_(true) {} + PerfCounters() : is_valid_(false) {} + + std::vector counter_ids_; + const std::vector counter_names_; + const bool is_valid_; +}; + +// Typical usage of the above primitives. +class PerfCountersMeasurement final { + public: + PerfCountersMeasurement(PerfCounters&& c) + : counters_(std::move(c)), + start_values_(counters_.IsValid() ? counters_.names().size() : 0), + end_values_(counters_.IsValid() ? counters_.names().size() : 0) {} + + bool IsValid() const { return counters_.IsValid(); } + + BENCHMARK_ALWAYS_INLINE void Start() { + assert(IsValid()); + // Tell the compiler to not move instructions above/below where we take + // the snapshot. + ClobberMemory(); + counters_.Snapshot(&start_values_); + ClobberMemory(); + } + + BENCHMARK_ALWAYS_INLINE std::vector> + StopAndGetMeasurements() { + assert(IsValid()); + // Tell the compiler to not move instructions above/below where we take + // the snapshot. + ClobberMemory(); + counters_.Snapshot(&end_values_); + ClobberMemory(); + + std::vector> ret; + for (size_t i = 0; i < counters_.names().size(); ++i) { + double measurement = static_cast(end_values_[i]) - + static_cast(start_values_[i]); + ret.push_back({counters_.names()[i], measurement}); + } + return ret; + } + + private: + PerfCounters counters_; + PerfCounterValues start_values_; + PerfCounterValues end_values_; +}; + +BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize(); + +} // namespace internal +} // namespace benchmark + +#endif // BENCHMARK_PERF_COUNTERS_H diff --git a/src/re.h b/src/re.h index fbe2503..6300467 100644 --- a/src/re.h +++ b/src/re.h @@ -126,7 +126,7 @@ inline bool Regex::Init(const std::string& spec, std::string* error) { // regerror returns the number of bytes necessary to null terminate // the string, so we move that when assigning to error. - CHECK_NE(needed, 0); + BM_CHECK_NE(needed, 0); error->assign(errbuf, needed - 1); delete[] errbuf; diff --git a/src/reporter.cc b/src/reporter.cc index 337575a..1d2df17 100644 --- a/src/reporter.cc +++ b/src/reporter.cc @@ -12,19 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" -#include "timers.h" - #include - #include +#include +#include #include #include +#include "benchmark/benchmark.h" #include "check.h" #include "string_util.h" +#include "timers.h" namespace benchmark { +namespace internal { +extern std::map *global_context; +} BenchmarkReporter::BenchmarkReporter() : output_stream_(&std::cout), error_stream_(&std::cerr) {} @@ -33,7 +36,7 @@ BenchmarkReporter::~BenchmarkReporter() {} void BenchmarkReporter::PrintBasicContext(std::ostream *out, Context const &context) { - CHECK(out) << "cannot be null"; + BM_CHECK(out) << "cannot be null"; auto &Out = *out; Out << LocalDateTimeString() << "\n"; @@ -64,6 +67,12 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, Out << "\n"; } + if (internal::global_context != nullptr) { + for (const auto &kv : *internal::global_context) { + Out << kv.first << ": " << kv.second << "\n"; + } + } + if (CPUInfo::Scaling::ENABLED == info.scaling) { Out << "***WARNING*** CPU scaling is enabled, the benchmark " "real time measurements may be noisy and will incur extra " diff --git a/src/sleep.cc b/src/sleep.cc index 1512ac9..ab59000 100644 --- a/src/sleep.cc +++ b/src/sleep.cc @@ -24,6 +24,10 @@ #include #endif +#ifdef BENCHMARK_OS_ZOS +#include +#endif + namespace benchmark { #ifdef BENCHMARK_OS_WINDOWS // Window's Sleep takes milliseconds argument. @@ -31,13 +35,24 @@ void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); } void SleepForSeconds(double seconds) { SleepForMilliseconds(static_cast(kNumMillisPerSecond * seconds)); } -#else // BENCHMARK_OS_WINDOWS +#else // BENCHMARK_OS_WINDOWS void SleepForMicroseconds(int microseconds) { +#ifdef BENCHMARK_OS_ZOS + // z/OS does not support nanosleep. Instead call sleep() and then usleep() to + // sleep for the remaining microseconds because usleep() will fail if its + // argument is greater than 1000000. + div_t sleepTime = div(microseconds, kNumMicrosPerSecond); + int seconds = sleepTime.quot; + while (seconds != 0) seconds = sleep(seconds); + while (usleep(sleepTime.rem) == -1 && errno == EINTR) + ; +#else struct timespec sleep_time; sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro; while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) ; // Ignore signals and wait for the full interval to elapse. +#endif } void SleepForMilliseconds(int milliseconds) { diff --git a/src/statistics.cc b/src/statistics.cc index bd5a3d6..3e5ef09 100644 --- a/src/statistics.cc +++ b/src/statistics.cc @@ -13,15 +13,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" +#include "statistics.h" #include #include #include #include #include + +#include "benchmark/benchmark.h" #include "check.h" -#include "statistics.h" namespace benchmark { @@ -74,6 +75,15 @@ double StatisticsStdDev(const std::vector& v) { return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean))); } +double StatisticsCV(const std::vector& v) { + if (v.size() < 2) return 0.0; + + const auto stddev = StatisticsStdDev(v); + const auto mean = StatisticsMean(v); + + return stddev / mean; +} + std::vector ComputeStats( const std::vector& reports) { typedef BenchmarkReporter::Run Run; @@ -112,22 +122,22 @@ std::vector ComputeStats( it = counter_stats.find(cnt.first); it->second.s.reserve(reports.size()); } else { - CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); + BM_CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); } } } // Populate the accumulators. for (Run const& run : reports) { - CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); - CHECK_EQ(run_iterations, run.iterations); + BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); + BM_CHECK_EQ(run_iterations, run.iterations); if (run.error_occurred) continue; real_accumulated_time_stat.emplace_back(run.real_accumulated_time); cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time); // user counters for (auto const& cnt : run.counters) { auto it = counter_stats.find(cnt.first); - CHECK_NE(it, counter_stats.end()); + BM_CHECK_NE(it, counter_stats.end()); it->second.s.emplace_back(cnt.second); } } @@ -148,11 +158,14 @@ std::vector ComputeStats( // Get the data from the accumulator to BenchmarkReporter::Run's. Run data; data.run_name = reports[0].run_name; + data.family_index = reports[0].family_index; + data.per_family_instance_index = reports[0].per_family_instance_index; data.run_type = BenchmarkReporter::Run::RT_Aggregate; data.threads = reports[0].threads; data.repetitions = reports[0].repetitions; data.repetition_index = Run::no_repetition_index; data.aggregate_name = Stat.name_; + data.aggregate_unit = Stat.unit_; data.report_label = report_label; // It is incorrect to say that an aggregate is computed over @@ -165,13 +178,15 @@ std::vector ComputeStats( data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat); data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat); - // We will divide these times by data.iterations when reporting, but the - // data.iterations is not nessesairly the scale of these measurements, - // because in each repetition, these timers are sum over all the iterations. - // And if we want to say that the stats are over N repetitions and not - // M iterations, we need to multiply these by (N/M). - data.real_accumulated_time *= iteration_rescale_factor; - data.cpu_accumulated_time *= iteration_rescale_factor; + if (data.aggregate_unit == StatisticUnit::kTime) { + // We will divide these times by data.iterations when reporting, but the + // data.iterations is not necessarily the scale of these measurements, + // because in each repetition, these timers are sum over all the iters. + // And if we want to say that the stats are over N repetitions and not + // M iterations, we need to multiply these by (N/M). + data.real_accumulated_time *= iteration_rescale_factor; + data.cpu_accumulated_time *= iteration_rescale_factor; + } data.time_unit = reports[0].time_unit; diff --git a/src/statistics.h b/src/statistics.h index 7eccc85..a9545a5 100644 --- a/src/statistics.h +++ b/src/statistics.h @@ -31,6 +31,7 @@ std::vector ComputeStats( double StatisticsMean(const std::vector& v); double StatisticsMedian(const std::vector& v); double StatisticsStdDev(const std::vector& v); +double StatisticsCV(const std::vector& v); } // end namespace benchmark diff --git a/src/string_util.cc b/src/string_util.cc index ac60b55..401fa13 100644 --- a/src/string_util.cc +++ b/src/string_util.cc @@ -151,7 +151,7 @@ std::string StrFormatImp(const char* msg, va_list args) { auto buff_ptr = std::unique_ptr(new char[size]); // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation // in the android-ndk - ret = vsnprintf(buff_ptr.get(), size, msg, args); + vsnprintf(buff_ptr.get(), size, msg, args); return std::string(buff_ptr.get()); } @@ -163,6 +163,19 @@ std::string StrFormat(const char* format, ...) { return tmp; } +std::vector StrSplit(const std::string& str, char delim) { + if (str.empty()) return {}; + std::vector ret; + size_t first = 0; + size_t next = str.find(delim); + for (; next != std::string::npos; + first = next + 1, next = str.find(delim, first)) { + ret.push_back(str.substr(first, next - first)); + } + ret.push_back(str.substr(first)); + return ret; +} + #ifdef BENCHMARK_STL_ANDROID_GNUSTL /* * GNU STL in Android NDK lacks support for some C++11 functions, including @@ -185,11 +198,10 @@ unsigned long stoul(const std::string& str, size_t* pos, int base) { /* Check for errors and return */ if (strtoulErrno == ERANGE) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of unsigned long"); + throw std::out_of_range("stoul failed: " + str + + " is outside of range of unsigned long"); } else if (strEnd == strStart || strtoulErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); + throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast(strEnd - strStart); @@ -212,11 +224,10 @@ int stoi(const std::string& str, size_t* pos, int base) { /* Check for errors and return */ if (strtolErrno == ERANGE || long(int(result)) != result) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of int"); + throw std::out_of_range("stoul failed: " + str + + " is outside of range of int"); } else if (strEnd == strStart || strtolErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); + throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast(strEnd - strStart); @@ -239,11 +250,10 @@ double stod(const std::string& str, size_t* pos) { /* Check for errors and return */ if (strtodErrno == ERANGE) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of int"); + throw std::out_of_range("stoul failed: " + str + + " is outside of range of int"); } else if (strEnd == strStart || strtodErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); + throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast(strEnd - strStart); diff --git a/src/string_util.h b/src/string_util.h index 09d7b4b..ff3b7da 100644 --- a/src/string_util.h +++ b/src/string_util.h @@ -4,6 +4,7 @@ #include #include #include + #include "internal_macros.h" namespace benchmark { @@ -37,6 +38,10 @@ inline std::string StrCat(Args&&... args) { return ss.str(); } +std::vector StrSplit(const std::string& str, char delim); + +// Disable lint checking for this block since it re-implements C functions. +// NOLINTBEGIN #ifdef BENCHMARK_STL_ANDROID_GNUSTL /* * GNU STL in Android NDK lacks support for some C++11 functions, including @@ -45,14 +50,15 @@ inline std::string StrCat(Args&&... args) { * namespace, not std:: namespace. */ unsigned long stoul(const std::string& str, size_t* pos = nullptr, - int base = 10); + int base = 10); int stoi(const std::string& str, size_t* pos = nullptr, int base = 10); double stod(const std::string& str, size_t* pos = nullptr); #else -using std::stoul; -using std::stoi; -using std::stod; +using std::stod; // NOLINT(misc-unused-using-decls) +using std::stoi; // NOLINT(misc-unused-using-decls) +using std::stoul; // NOLINT(misc-unused-using-decls) #endif +// NOLINTEND } // end namespace benchmark diff --git a/src/sysinfo.cc b/src/sysinfo.cc index 8bab932..87dcfb4 100644 --- a/src/sysinfo.cc +++ b/src/sysinfo.cc @@ -19,6 +19,7 @@ #undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA #include #include + #include #else #include @@ -29,7 +30,8 @@ #include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \ - defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD + defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD || \ + defined BENCHMARK_OS_DRAGONFLY #define BENCHMARK_HAS_SYSCTL #include #endif @@ -54,9 +56,9 @@ #include #include #include +#include #include #include -#include #include #include "check.h" @@ -134,7 +136,7 @@ struct ValueUnion { template std::array GetAsArray() { const int ArrSize = sizeof(T) * N; - CHECK_LE(ArrSize, Size); + BM_CHECK_LE(ArrSize, Size); std::array Arr; std::memcpy(Arr.data(), data(), ArrSize); return Arr; @@ -146,7 +148,7 @@ ValueUnion GetSysctlImp(std::string const& Name) { int mib[2]; mib[0] = CTL_HW; - if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")){ + if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")) { ValueUnion buff(sizeof(int)); if (Name == "hw.ncpu") { @@ -213,10 +215,9 @@ bool ReadFromFile(std::string const& fname, ArgT* arg) { CPUInfo::Scaling CpuScaling(int num_cpus) { // We don't have a valid CPU count, so don't even bother. if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN; -#ifdef BENCHMARK_OS_QNX +#if defined(BENCHMARK_OS_QNX) return CPUInfo::Scaling::UNKNOWN; -#endif -#ifndef BENCHMARK_OS_WINDOWS +#elif !defined(BENCHMARK_OS_WINDOWS) // On Linux, the CPUfreq subsystem exposes CPU information as files on the // local file system. If reading the exported files fails, then we may not be // running on Linux, so we silently ignore all the read errors. @@ -224,11 +225,13 @@ CPUInfo::Scaling CpuScaling(int num_cpus) { for (int cpu = 0; cpu < num_cpus; ++cpu) { std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); - if (ReadFromFile(governor_file, &res) && res != "performance") return CPUInfo::Scaling::ENABLED; + if (ReadFromFile(governor_file, &res) && res != "performance") + return CPUInfo::Scaling::ENABLED; } return CPUInfo::Scaling::DISABLED; -#endif +#else return CPUInfo::Scaling::UNKNOWN; +#endif } int CountSetBitsInCPUMap(std::string Val) { @@ -343,6 +346,7 @@ std::vector GetCacheSizesWindows() { C.num_sharing = static_cast(B.count()); C.level = Cache->Level; C.size = Cache->Size; + C.type = "Unknown"; switch (Cache->Type) { case CacheUnified: C.type = "Unified"; @@ -356,9 +360,6 @@ std::vector GetCacheSizesWindows() { case CacheTrace: C.type = "Trace"; break; - default: - C.type = "Unknown"; - break; } res.push_back(C); } @@ -367,29 +368,29 @@ std::vector GetCacheSizesWindows() { #elif BENCHMARK_OS_QNX std::vector GetCacheSizesQNX() { std::vector res; - struct cacheattr_entry *cache = SYSPAGE_ENTRY(cacheattr); + struct cacheattr_entry* cache = SYSPAGE_ENTRY(cacheattr); uint32_t const elsize = SYSPAGE_ELEMENT_SIZE(cacheattr); - int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize ; - for(int i = 0; i < num; ++i ) { + int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize; + for (int i = 0; i < num; ++i) { CPUInfo::CacheInfo info; - switch (cache->flags){ - case CACHE_FLAG_INSTR : + switch (cache->flags) { + case CACHE_FLAG_INSTR: info.type = "Instruction"; info.level = 1; break; - case CACHE_FLAG_DATA : + case CACHE_FLAG_DATA: info.type = "Data"; info.level = 1; break; - case CACHE_FLAG_UNIFIED : + case CACHE_FLAG_UNIFIED: info.type = "Unified"; info.level = 2; break; - case CACHE_FLAG_SHARED : + case CACHE_FLAG_SHARED: info.type = "Shared"; info.level = 3; break; - default : + default: continue; break; } @@ -417,24 +418,23 @@ std::vector GetCacheSizes() { std::string GetSystemName() { #if defined(BENCHMARK_OS_WINDOWS) std::string str; - const unsigned COUNT = MAX_COMPUTERNAME_LENGTH+1; - TCHAR hostname[COUNT] = {'\0'}; + const unsigned COUNT = MAX_COMPUTERNAME_LENGTH + 1; + TCHAR hostname[COUNT] = {'\0'}; DWORD DWCOUNT = COUNT; - if (!GetComputerName(hostname, &DWCOUNT)) - return std::string(""); + if (!GetComputerName(hostname, &DWCOUNT)) return std::string(""); #ifndef UNICODE str = std::string(hostname, DWCOUNT); #else - //Using wstring_convert, Is deprecated in C++17 + // Using wstring_convert, Is deprecated in C++17 using convert_type = std::codecvt_utf8; std::wstring_convert converter; std::wstring wStr(hostname, DWCOUNT); str = converter.to_bytes(wStr); #endif return str; -#else // defined(BENCHMARK_OS_WINDOWS) +#else // defined(BENCHMARK_OS_WINDOWS) #ifndef HOST_NAME_MAX -#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined +#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined #define HOST_NAME_MAX 64 #elif defined(BENCHMARK_OS_NACL) #define HOST_NAME_MAX 64 @@ -443,15 +443,15 @@ std::string GetSystemName() { #elif defined(BENCHMARK_OS_RTEMS) #define HOST_NAME_MAX 256 #else -#warning "HOST_NAME_MAX not defined. using 64" +#pragma message("HOST_NAME_MAX not defined. using 64") #define HOST_NAME_MAX 64 #endif -#endif // def HOST_NAME_MAX +#endif // def HOST_NAME_MAX char hostname[HOST_NAME_MAX]; int retVal = gethostname(hostname, HOST_NAME_MAX); if (retVal != 0) return std::string(""); return std::string(hostname); -#endif // Catch-all POSIX block. +#endif // Catch-all POSIX block. } int GetNumCPUs() { @@ -473,8 +473,7 @@ int GetNumCPUs() { // Returns -1 in case of a failure. int NumCPU = sysconf(_SC_NPROCESSORS_ONLN); if (NumCPU < 0) { - fprintf(stderr, - "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", + fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", strerror(errno)); } return NumCPU; @@ -497,7 +496,8 @@ int GetNumCPUs() { #if defined(__s390__) // s390 has another format in /proc/cpuinfo // it needs to be parsed differently - if (SplitIdx != std::string::npos) value = ln.substr(Key.size()+1,SplitIdx-Key.size()-1); + if (SplitIdx != std::string::npos) + value = ln.substr(Key.size() + 1, SplitIdx - Key.size() - 1); #else if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); #endif @@ -529,7 +529,11 @@ int GetNumCPUs() { BENCHMARK_UNREACHABLE(); } -double GetCPUCyclesPerSecond() { +double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { + // Currently, scaling is only used on linux path here, + // suppress diagnostics about it being unused on other paths. + (void)scaling; + #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN long freq; @@ -540,8 +544,15 @@ double GetCPUCyclesPerSecond() { // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as // well. if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) - // If CPU scaling is in effect, we want to use the *maximum* frequency, - // not whatever CPU speed some random processor happens to be using now. + // If CPU scaling is disabled, use the *current* frequency. + // Note that we specifically don't want to read cpuinfo_cur_freq, + // because it is only readable by root. + || (scaling == CPUInfo::Scaling::DISABLED && + ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", + &freq)) + // Otherwise, if CPU scaling may be in effect, we want to use + // the *maximum* frequency, not whatever CPU speed some random processor + // happens to be using now. || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", &freq)) { // The value is in kHz (as the file name suggests). For example, on a @@ -607,6 +618,8 @@ double GetCPUCyclesPerSecond() { "machdep.tsc_freq"; #elif defined BENCHMARK_OS_OPENBSD "hw.cpuspeed"; +#elif defined BENCHMARK_OS_DRAGONFLY + "hw.tsc_frequency"; #else "hw.cpufrequency"; #endif @@ -630,13 +643,13 @@ double GetCPUCyclesPerSecond() { "~MHz", nullptr, &data, &data_size))) return static_cast((int64_t)data * (int64_t)(1000 * 1000)); // was mhz -#elif defined (BENCHMARK_OS_SOLARIS) - kstat_ctl_t *kc = kstat_open(); +#elif defined(BENCHMARK_OS_SOLARIS) + kstat_ctl_t* kc = kstat_open(); if (!kc) { std::cerr << "failed to open /dev/kstat\n"; return -1; } - kstat_t *ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); + kstat_t* ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); if (!ksp) { std::cerr << "failed to lookup in /dev/kstat\n"; return -1; @@ -645,7 +658,7 @@ double GetCPUCyclesPerSecond() { std::cerr << "failed to read from /dev/kstat\n"; return -1; } - kstat_named_t *knp = + kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz"); if (!knp) { std::cerr << "failed to lookup data in /dev/kstat\n"; @@ -659,7 +672,7 @@ double GetCPUCyclesPerSecond() { double clock_hz = knp->value.ui64; kstat_close(kc); return clock_hz; -#elif defined (BENCHMARK_OS_QNX) +#elif defined(BENCHMARK_OS_QNX) return static_cast((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) * (int64_t)(1000 * 1000)); #endif @@ -671,9 +684,10 @@ double GetCPUCyclesPerSecond() { } std::vector GetLoadAvg() { -#if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \ - defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ - defined BENCHMARK_OS_OPENBSD) && !defined(__ANDROID__) +#if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \ + defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ + defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \ + !defined(__ANDROID__) constexpr int kMaxSamples = 3; std::vector res(kMaxSamples, 0.0); const int nelem = getloadavg(res.data(), kMaxSamples); @@ -697,12 +711,11 @@ const CPUInfo& CPUInfo::Get() { CPUInfo::CPUInfo() : num_cpus(GetNumCPUs()), - cycles_per_second(GetCPUCyclesPerSecond()), - caches(GetCacheSizes()), scaling(CpuScaling(num_cpus)), + cycles_per_second(GetCPUCyclesPerSecond(scaling)), + caches(GetCacheSizes()), load_avg(GetLoadAvg()) {} - const SystemInfo& SystemInfo::Get() { static const SystemInfo* info = new SystemInfo(); return *info; diff --git a/src/thread_manager.h b/src/thread_manager.h index 28e2dd5..4680285 100644 --- a/src/thread_manager.h +++ b/src/thread_manager.h @@ -36,7 +36,6 @@ class ThreadManager { [this]() { return alive_threads_ == 0; }); } - public: struct Result { IterationCount iterations = 0; double real_time_used = 0; diff --git a/src/thread_timer.h b/src/thread_timer.h index 1703ca0..eb23f59 100644 --- a/src/thread_timer.h +++ b/src/thread_timer.h @@ -28,7 +28,7 @@ class ThreadTimer { // Called by each thread void StopTimer() { - CHECK(running_); + BM_CHECK(running_); running_ = false; real_time_used_ += ChronoClockNow() - start_real_time_; // Floating point error can result in the subtraction producing a negative @@ -44,19 +44,19 @@ class ThreadTimer { // REQUIRES: timer is not running double real_time_used() const { - CHECK(!running_); + BM_CHECK(!running_); return real_time_used_; } // REQUIRES: timer is not running double cpu_time_used() const { - CHECK(!running_); + BM_CHECK(!running_); return cpu_time_used_; } // REQUIRES: timer is not running double manual_time_used() const { - CHECK(!running_); + BM_CHECK(!running_); return manual_time_used_; } diff --git a/src/timers.cc b/src/timers.cc index 4f76edd..21d3db2 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "timers.h" + #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS @@ -28,7 +29,8 @@ #include #include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include -#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX +#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_DRAGONFLY || \ + defined BENCHMARK_OS_MACOSX #include #endif #if defined(BENCHMARK_OS_MACOSX) @@ -124,8 +126,8 @@ double ProcessCPUUsage() { // syncronous system calls in Emscripten. return emscripten_get_now() * 1e-3; #elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) - // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See - // https://github.com/google/benchmark/pull/292 + // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. + // See https://github.com/google/benchmark/pull/292 struct timespec spec; if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) return MakeTime(spec); @@ -148,13 +150,14 @@ double ThreadCPUUsage() { &user_time); return MakeTime(kernel_time, user_time); #elif defined(BENCHMARK_OS_MACOSX) - // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See - // https://github.com/google/benchmark/pull/292 + // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. + // See https://github.com/google/benchmark/pull/292 mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; thread_basic_info_data_t info; mach_port_t thread = pthread_mach_thread_np(pthread_self()); - if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count) == - KERN_SUCCESS) { + if (thread_info(thread, THREAD_BASIC_INFO, + reinterpret_cast(&info), + &count) == KERN_SUCCESS) { return MakeTime(info); } DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); @@ -189,15 +192,26 @@ std::string LocalDateTimeString() { std::size_t timestamp_len; long int offset_minutes; char tz_offset_sign = '+'; - // Long enough buffers to avoid format-overflow warnings - char tz_offset[128]; + // tz_offset is set in one of three ways: + // * strftime with %z - This either returns empty or the ISO 8601 time. The + // maximum length an + // ISO 8601 string can be is 7 (e.g. -03:30, plus trailing zero). + // * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to + // 19 for %02li, + // one for :, up to 19 %02li, plus trailing zero). + // * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus + // trailing zero). + // + // Thus, the maximum size this needs to be is 41. + char tz_offset[41]; + // Long enough buffer to avoid format-overflow warnings char storage[128]; #if defined(BENCHMARK_OS_WINDOWS) - std::tm *timeinfo_p = ::localtime(&now); + std::tm* timeinfo_p = ::localtime(&now); #else std::tm timeinfo; - std::tm *timeinfo_p = &timeinfo; + std::tm* timeinfo_p = &timeinfo; ::localtime_r(&now, &timeinfo); #endif @@ -214,10 +228,11 @@ std::string LocalDateTimeString() { tz_offset_sign = '-'; } - tz_len = ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", - tz_offset_sign, offset_minutes / 100, offset_minutes % 100); - CHECK(tz_len == kTzOffsetLen); - ((void)tz_len); // Prevent unused variable warning in optimized build. + tz_len = + ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", + tz_offset_sign, offset_minutes / 100, offset_minutes % 100); + BM_CHECK(tz_len == kTzOffsetLen); + ((void)tz_len); // Prevent unused variable warning in optimized build. } else { // Unknown offset. RFC3339 specifies that unknown local offsets should be // written as UTC time with -00:00 timezone. @@ -231,9 +246,9 @@ std::string LocalDateTimeString() { strncpy(tz_offset, "-00:00", kTzOffsetLen + 1); } - timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", - timeinfo_p); - CHECK(timestamp_len == kTimestampLen); + timestamp_len = + std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", timeinfo_p); + BM_CHECK(timestamp_len == kTimestampLen); // Prevent unused variable warning in optimized build. ((void)kTimestampLen); diff --git a/test/BUILD b/test/BUILD index 9bb8cb0..df700a7 100644 --- a/test/BUILD +++ b/test/BUILD @@ -20,6 +20,8 @@ TEST_ARGS = ["--benchmark_min_time=0.01"] PER_SRC_TEST_ARGS = ({ "user_counters_tabular_test.cc": ["--benchmark_counters_tabular=true"], + "repetitions_test.cc": [" --benchmark_repetitions=3"], + "spec_arg_test.cc" : ["--benchmark_filter=BM_NotChosen"], }) load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c1a3a3f..162af53 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -56,6 +56,12 @@ endmacro(compile_output_test) compile_benchmark_test(benchmark_test) add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01) +compile_benchmark_test(spec_arg_test) +add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen) + +compile_benchmark_test(benchmark_setup_teardown_test) +add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test) + compile_benchmark_test(filter_test) macro(add_filter_test name filter expect) add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect}) @@ -87,6 +93,9 @@ add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01) compile_benchmark_test(basic_test) add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01) +compile_output_test(repetitions_test) +add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01 --benchmark_repetitions=3) + compile_benchmark_test(diagnostics_test) add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01) @@ -128,6 +137,9 @@ add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_ compile_output_test(user_counters_test) add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01) +compile_output_test(perf_counters_test) +add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01 --benchmark_perf_counters=CYCLES,BRANCHES) + compile_output_test(internal_threading_test) add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01) @@ -193,9 +205,11 @@ if (BENCHMARK_ENABLE_GTEST_TESTS) add_gtest(benchmark_gtest) add_gtest(benchmark_name_gtest) + add_gtest(benchmark_random_interleaving_gtest) add_gtest(commandlineflags_gtest) add_gtest(statistics_gtest) add_gtest(string_util_gtest) + add_gtest(perf_counters_gtest) endif(BENCHMARK_ENABLE_GTEST_TESTS) ############################################################################### diff --git a/test/args_product_test.cc b/test/args_product_test.cc index 8a859f8..d44f391 100644 --- a/test/args_product_test.cc +++ b/test/args_product_test.cc @@ -1,10 +1,10 @@ -#include "benchmark/benchmark.h" - #include #include #include #include +#include "benchmark/benchmark.h" + class ArgsProductFixture : public ::benchmark::Fixture { public: ArgsProductFixture() @@ -23,7 +23,7 @@ class ArgsProductFixture : public ::benchmark::Fixture { {2, 15, 10, 9}, {4, 5, 6, 11}}) {} - void SetUp(const ::benchmark::State& state) { + void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { std::vector ranges = {state.range(0), state.range(1), state.range(2), state.range(3)}; @@ -37,7 +37,7 @@ class ArgsProductFixture : public ::benchmark::Fixture { virtual ~ArgsProductFixture() { if (actualValues != expectedValues) { std::cout << "EXPECTED\n"; - for (auto v : expectedValues) { + for (const auto& v : expectedValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; @@ -45,7 +45,7 @@ class ArgsProductFixture : public ::benchmark::Fixture { std::cout << "}\n"; } std::cout << "ACTUAL\n"; - for (auto v : actualValues) { + for (const auto& v : actualValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; diff --git a/test/basic_test.cc b/test/basic_test.cc index 5f3dd1a..3a8fd42 100644 --- a/test/basic_test.cc +++ b/test/basic_test.cc @@ -13,7 +13,7 @@ BENCHMARK(BM_empty)->ThreadPerCpu(); void BM_spin_empty(benchmark::State& state) { for (auto _ : state) { - for (int x = 0; x < state.range(0); ++x) { + for (auto x = 0; x < state.range(0); ++x) { benchmark::DoNotOptimize(x); } } @@ -22,11 +22,11 @@ BASIC_BENCHMARK_TEST(BM_spin_empty); BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu(); void BM_spin_pause_before(benchmark::State& state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -37,11 +37,11 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu(); void BM_spin_pause_during(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } state.ResumeTiming(); - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -62,11 +62,11 @@ BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu(); void BM_spin_pause_after(benchmark::State& state) { for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -74,15 +74,15 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_after); BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu(); void BM_spin_pause_before_and_after(benchmark::State& state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -96,7 +96,6 @@ void BM_empty_stop_start(benchmark::State& state) { BENCHMARK(BM_empty_stop_start); BENCHMARK(BM_empty_stop_start)->ThreadPerCpu(); - void BM_KeepRunning(benchmark::State& state) { benchmark::IterationCount iter_count = 0; assert(iter_count == state.iterations()); @@ -108,15 +107,30 @@ void BM_KeepRunning(benchmark::State& state) { BENCHMARK(BM_KeepRunning); void BM_KeepRunningBatch(benchmark::State& state) { - // Choose a prime batch size to avoid evenly dividing max_iterations. - const benchmark::IterationCount batch_size = 101; + // Choose a batch size >1000 to skip the typical runs with iteration + // targets of 10, 100 and 1000. If these are not actually skipped the + // bug would be detectable as consecutive runs with the same iteration + // count. Below we assert that this does not happen. + const benchmark::IterationCount batch_size = 1009; + + static benchmark::IterationCount prior_iter_count = 0; benchmark::IterationCount iter_count = 0; while (state.KeepRunningBatch(batch_size)) { iter_count += batch_size; } assert(state.iterations() == iter_count); + + // Verify that the iteration count always increases across runs (see + // comment above). + assert(iter_count == batch_size // max_iterations == 1 + || iter_count > prior_iter_count); // max_iterations > batch_size + prior_iter_count = iter_count; } -BENCHMARK(BM_KeepRunningBatch); +// Register with a fixed repetition count to establish the invariant that +// the iteration count should always change across runs. This overrides +// the --benchmark_repetitions command line flag, which would otherwise +// cause this test to fail if set > 1. +BENCHMARK(BM_KeepRunningBatch)->Repetitions(1); void BM_RangedFor(benchmark::State& state) { benchmark::IterationCount iter_count = 0; @@ -127,10 +141,39 @@ void BM_RangedFor(benchmark::State& state) { } BENCHMARK(BM_RangedFor); +#ifdef BENCHMARK_HAS_CXX11 +template +void BM_OneTemplateFunc(benchmark::State& state) { + auto arg = state.range(0); + T sum = 0; + for (auto _ : state) { + sum += arg; + } +} +BENCHMARK(BM_OneTemplateFunc)->Arg(1); +BENCHMARK(BM_OneTemplateFunc)->Arg(1); + +template +void BM_TwoTemplateFunc(benchmark::State& state) { + auto arg = state.range(0); + A sum = 0; + B prod = 1; + for (auto _ : state) { + sum += arg; + prod *= arg; + } +} +BENCHMARK(BM_TwoTemplateFunc)->Arg(1); +BENCHMARK(BM_TwoTemplateFunc)->Arg(1); + +#endif // BENCHMARK_HAS_CXX11 + // Ensure that StateIterator provides all the necessary typedefs required to // instantiate std::iterator_traits. -static_assert(std::is_same< - typename std::iterator_traits::value_type, - typename benchmark::State::StateIterator::value_type>::value, ""); +static_assert( + std::is_same::value_type, + typename benchmark::State::StateIterator::value_type>::value, + ""); BENCHMARK_MAIN(); diff --git a/test/benchmark_gtest.cc b/test/benchmark_gtest.cc index 9557b20..14a885b 100644 --- a/test/benchmark_gtest.cc +++ b/test/benchmark_gtest.cc @@ -1,3 +1,5 @@ +#include +#include #include #include "../src/benchmark_register.h" @@ -6,6 +8,8 @@ namespace benchmark { namespace internal { +extern std::map* global_context; + namespace { TEST(AddRangeTest, Simple) { @@ -90,6 +94,12 @@ TEST(AddRangeTest, ZeroOnlyRange) { EXPECT_THAT(dst, testing::ElementsAre(0)); } +TEST(AddRangeTest, ZeroStartingRange) { + std::vector dst; + AddRange(&dst, 0, 2, 2); + EXPECT_THAT(dst, testing::ElementsAre(0, 1, 2)); +} + TEST(AddRangeTest, NegativeRange64) { std::vector dst; AddRange(&dst, -4, 4, 2); @@ -123,6 +133,33 @@ TEST(AddRangeTest, Simple8) { EXPECT_THAT(dst, testing::ElementsAre(1, 2, 4, 8)); } +TEST(AddCustomContext, Simple) { + EXPECT_THAT(global_context, nullptr); + + AddCustomContext("foo", "bar"); + AddCustomContext("baz", "qux"); + + EXPECT_THAT(*global_context, + testing::UnorderedElementsAre(testing::Pair("foo", "bar"), + testing::Pair("baz", "qux"))); + + delete global_context; + global_context = nullptr; +} + +TEST(AddCustomContext, DuplicateKey) { + EXPECT_THAT(global_context, nullptr); + + AddCustomContext("foo", "bar"); + AddCustomContext("foo", "qux"); + + EXPECT_THAT(*global_context, + testing::UnorderedElementsAre(testing::Pair("foo", "bar"))); + + delete global_context; + global_context = nullptr; +} + } // namespace } // namespace internal } // namespace benchmark diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc new file mode 100644 index 0000000..d04befa --- /dev/null +++ b/test/benchmark_random_interleaving_gtest.cc @@ -0,0 +1,127 @@ +#include +#include +#include + +#include "../src/commandlineflags.h" +#include "../src/string_util.h" +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace benchmark { + +BM_DECLARE_bool(benchmark_enable_random_interleaving); +BM_DECLARE_string(benchmark_filter); +BM_DECLARE_int32(benchmark_repetitions); + +namespace internal { +namespace { + +class EventQueue : public std::queue { + public: + void Put(const std::string& event) { push(event); } + + void Clear() { + while (!empty()) { + pop(); + } + } + + std::string Get() { + std::string event = front(); + pop(); + return event; + } +}; + +EventQueue* queue = new EventQueue(); + +class NullReporter : public BenchmarkReporter { + public: + bool ReportContext(const Context& /*context*/) override { return true; } + void ReportRuns(const std::vector& /* report */) override {} +}; + +class BenchmarkTest : public testing::Test { + public: + static void SetupHook(int /* num_threads */) { queue->push("Setup"); } + + static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); } + + void Execute(const std::string& pattern) { + queue->Clear(); + + BenchmarkReporter* reporter = new NullReporter; + FLAGS_benchmark_filter = pattern; + RunSpecifiedBenchmarks(reporter); + delete reporter; + + queue->Put("DONE"); // End marker + } +}; + +void BM_Match1(benchmark::State& state) { + const int64_t arg = state.range(0); + + for (auto _ : state) { + } + queue->Put(StrFormat("BM_Match1/%d", static_cast(arg))); +} +BENCHMARK(BM_Match1) + ->Iterations(100) + ->Arg(1) + ->Arg(2) + ->Arg(3) + ->Range(10, 80) + ->Args({90}) + ->Args({100}); + +TEST_F(BenchmarkTest, Match1) { + Execute("BM_Match1"); + ASSERT_EQ("BM_Match1/1", queue->Get()); + ASSERT_EQ("BM_Match1/2", queue->Get()); + ASSERT_EQ("BM_Match1/3", queue->Get()); + ASSERT_EQ("BM_Match1/10", queue->Get()); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("BM_Match1/90", queue->Get()); + ASSERT_EQ("BM_Match1/100", queue->Get()); + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRepetition) { + FLAGS_benchmark_repetitions = 2; + + Execute("BM_Match1/(64|80)"); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { + FLAGS_benchmark_enable_random_interleaving = true; + FLAGS_benchmark_repetitions = 100; + + std::map element_count; + std::map interleaving_count; + Execute("BM_Match1/(64|80)"); + for (int i = 0; i < 100; ++i) { + std::vector interleaving; + interleaving.push_back(queue->Get()); + interleaving.push_back(queue->Get()); + element_count[interleaving[0]]++; + element_count[interleaving[1]]++; + interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(), + interleaving[1].c_str())]++; + } + EXPECT_EQ(element_count["BM_Match1/64"], 100) << "Unexpected repetitions."; + EXPECT_EQ(element_count["BM_Match1/80"], 100) << "Unexpected repetitions."; + EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized."; + ASSERT_EQ("DONE", queue->Get()); +} + +} // namespace +} // namespace internal +} // namespace benchmark diff --git a/test/benchmark_setup_teardown_test.cc b/test/benchmark_setup_teardown_test.cc new file mode 100644 index 0000000..efa34e1 --- /dev/null +++ b/test/benchmark_setup_teardown_test.cc @@ -0,0 +1,157 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Test that Setup() and Teardown() are called exactly once +// for each benchmark run (single-threaded). +namespace single { +static int setup_call = 0; +static int teardown_call = 0; +} // namespace single +static void DoSetup1(const benchmark::State& state) { + ++single::setup_call; + + // Setup/Teardown should never be called with any thread_idx != 0. + assert(state.thread_index() == 0); +} + +static void DoTeardown1(const benchmark::State& state) { + ++single::teardown_call; + assert(state.thread_index() == 0); +} + +static void BM_with_setup(benchmark::State& state) { + for (auto s : state) { + } +} +BENCHMARK(BM_with_setup) + ->Arg(1) + ->Arg(3) + ->Arg(5) + ->Arg(7) + ->Iterations(100) + ->Setup(DoSetup1) + ->Teardown(DoTeardown1); + +// Test that Setup() and Teardown() are called once for each group of threads. +namespace concurrent { +static std::atomic setup_call(0); +static std::atomic teardown_call(0); +static std::atomic func_call(0); +} // namespace concurrent + +static void DoSetup2(const benchmark::State& state) { + concurrent::setup_call.fetch_add(1, std::memory_order_acquire); + assert(state.thread_index() == 0); +} + +static void DoTeardown2(const benchmark::State& state) { + concurrent::teardown_call.fetch_add(1, std::memory_order_acquire); + assert(state.thread_index() == 0); +} + +static void BM_concurrent(benchmark::State& state) { + for (auto s : state) { + } + concurrent::func_call.fetch_add(1, std::memory_order_acquire); +} + +BENCHMARK(BM_concurrent) + ->Setup(DoSetup2) + ->Teardown(DoTeardown2) + ->Iterations(100) + ->Threads(5) + ->Threads(10) + ->Threads(15); + +// Testing interaction with Fixture::Setup/Teardown +namespace fixture_interaction { +int setup = 0; +int fixture_setup = 0; +} // namespace fixture_interaction + +#define FIXTURE_BECHMARK_NAME MyFixture + +class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State&) BENCHMARK_OVERRIDE { + fixture_interaction::fixture_setup++; + } + + ~FIXTURE_BECHMARK_NAME() {} +}; + +BENCHMARK_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)(benchmark::State& st) { + for (auto _ : st) { + } +} + +static void DoSetupWithFixture(const benchmark::State&) { + fixture_interaction::setup++; +} + +BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, BM_WithFixture) + ->Arg(1) + ->Arg(3) + ->Arg(5) + ->Arg(7) + ->Setup(DoSetupWithFixture) + ->Repetitions(1) + ->Iterations(100); + +// Testing repetitions. +namespace repetitions { +int setup = 0; +} + +static void DoSetupWithRepetitions(const benchmark::State&) { + repetitions::setup++; +} +static void BM_WithRep(benchmark::State& state) { + for (auto _ : state) { + } +} + +BENCHMARK(BM_WithRep) + ->Arg(1) + ->Arg(3) + ->Arg(5) + ->Arg(7) + ->Setup(DoSetupWithRepetitions) + ->Iterations(100) + ->Repetitions(4); + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + + size_t ret = benchmark::RunSpecifiedBenchmarks("."); + assert(ret > 0); + + // Setup/Teardown is called once for each arg group (1,3,5,7). + assert(single::setup_call == 4); + assert(single::teardown_call == 4); + + // 3 group of threads calling this function (3,5,10). + assert(concurrent::setup_call.load(std::memory_order_relaxed) == 3); + assert(concurrent::teardown_call.load(std::memory_order_relaxed) == 3); + assert((5 + 10 + 15) == + concurrent::func_call.load(std::memory_order_relaxed)); + + // Setup is called 4 times, once for each arg group (1,3,5,7) + assert(fixture_interaction::setup == 4); + // Fixture::Setup is called everytime the bm routine is run. + // The exact number is indeterministic, so we just assert that + // it's more than setup. + assert(fixture_interaction::fixture_setup > fixture_interaction::setup); + + // Setup is call once for each repetition * num_arg = 4 * 4 = 16. + assert(repetitions::setup == 16); + + return 0; +} diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc index 3cd4f55..2906cdc 100644 --- a/test/benchmark_test.cc +++ b/test/benchmark_test.cc @@ -93,8 +93,9 @@ static void BM_SetInsert(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int)); } -// Test many inserts at once to reduce the total iterations needed. Otherwise, the slower, -// non-timed part of each iteration will make the benchmark take forever. +// Test many inserts at once to reduce the total iterations needed. Otherwise, +// the slower, non-timed part of each iteration will make the benchmark take +// forever. BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}}); template Range(1, 1 << 20); static void BM_SetupTeardown(benchmark::State& state) { - if (state.thread_index == 0) { + if (state.thread_index() == 0) { // No need to lock test_vector_mu here as this is running single-threaded. test_vector = new std::vector(); } @@ -139,7 +140,7 @@ static void BM_SetupTeardown(benchmark::State& state) { test_vector->pop_back(); ++i; } - if (state.thread_index == 0) { + if (state.thread_index() == 0) { delete test_vector; } } @@ -156,11 +157,11 @@ BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28); static void BM_ParallelMemset(benchmark::State& state) { int64_t size = state.range(0) / static_cast(sizeof(int)); - int thread_size = static_cast(size) / state.threads; - int from = thread_size * state.thread_index; + int thread_size = static_cast(size) / state.threads(); + int from = thread_size * state.thread_index(); int to = from + thread_size; - if (state.thread_index == 0) { + if (state.thread_index() == 0) { test_vector = new std::vector(static_cast(size)); } @@ -172,7 +173,7 @@ static void BM_ParallelMemset(benchmark::State& state) { } } - if (state.thread_index == 0) { + if (state.thread_index() == 0) { delete test_vector; } } @@ -214,7 +215,8 @@ BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"), std::pair(42, 3.8)); void BM_non_template_args(benchmark::State& state, int, double) { - while(state.KeepRunning()) {} + while (state.KeepRunning()) { + } } BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); @@ -223,14 +225,14 @@ BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); static void BM_DenseThreadRanges(benchmark::State& st) { switch (st.range(0)) { case 1: - assert(st.threads == 1 || st.threads == 2 || st.threads == 3); + assert(st.threads() == 1 || st.threads() == 2 || st.threads() == 3); break; case 2: - assert(st.threads == 1 || st.threads == 3 || st.threads == 4); + assert(st.threads() == 1 || st.threads() == 3 || st.threads() == 4); break; case 3: - assert(st.threads == 5 || st.threads == 8 || st.threads == 11 || - st.threads == 14); + assert(st.threads() == 5 || st.threads() == 8 || st.threads() == 11 || + st.threads() == 14); break; default: assert(false && "Invalid test case number"); diff --git a/test/clobber_memory_assembly_test.cc b/test/clobber_memory_assembly_test.cc index f41911a..ab26913 100644 --- a/test/clobber_memory_assembly_test.cc +++ b/test/clobber_memory_assembly_test.cc @@ -9,7 +9,6 @@ extern "C" { extern int ExternInt; extern int ExternInt2; extern int ExternInt3; - } // CHECK-LABEL: test_basic: diff --git a/test/commandlineflags_gtest.cc b/test/commandlineflags_gtest.cc index 656020f..8412008 100644 --- a/test/commandlineflags_gtest.cc +++ b/test/commandlineflags_gtest.cc @@ -2,6 +2,7 @@ #include "../src/commandlineflags.h" #include "../src/internal_macros.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" namespace benchmark { @@ -19,9 +20,7 @@ int setenv(const char* name, const char* value, int overwrite) { return _putenv_s(name, value); } -int unsetenv(const char* name) { - return _putenv_s(name, ""); -} +int unsetenv(const char* name) { return _putenv_s(name, ""); } #endif // BENCHMARK_OS_WINDOWS @@ -197,5 +196,33 @@ TEST(StringFromEnv, Valid) { unsetenv("IN_ENV"); } +TEST(KvPairsFromEnv, Default) { + ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); + EXPECT_THAT(KvPairsFromEnv("not_in_env", {{"foo", "bar"}}), + testing::ElementsAre(testing::Pair("foo", "bar"))); +} + +TEST(KvPairsFromEnv, MalformedReturnsDefault) { + ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); + EXPECT_THAT(KvPairsFromEnv("in_env", {{"foo", "bar"}}), + testing::ElementsAre(testing::Pair("foo", "bar"))); + unsetenv("IN_ENV"); +} + +TEST(KvPairsFromEnv, Single) { + ASSERT_EQ(setenv("IN_ENV", "foo=bar", 1), 0); + EXPECT_THAT(KvPairsFromEnv("in_env", {}), + testing::ElementsAre(testing::Pair("foo", "bar"))); + unsetenv("IN_ENV"); +} + +TEST(KvPairsFromEnv, Multiple) { + ASSERT_EQ(setenv("IN_ENV", "foo=bar,baz=qux", 1), 0); + EXPECT_THAT(KvPairsFromEnv("in_env", {}), + testing::UnorderedElementsAre(testing::Pair("foo", "bar"), + testing::Pair("baz", "qux"))); + unsetenv("IN_ENV"); +} + } // namespace } // namespace benchmark diff --git a/test/complexity_test.cc b/test/complexity_test.cc index 5681fdc..1251cd4 100644 --- a/test/complexity_test.cc +++ b/test/complexity_test.cc @@ -4,6 +4,7 @@ #include #include #include + #include "benchmark/benchmark.h" #include "output_test.h" @@ -12,8 +13,10 @@ namespace { #define ADD_COMPLEXITY_CASES(...) \ int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__) -int AddComplexityTest(std::string test_name, std::string big_o_test_name, - std::string rms_test_name, std::string big_o) { +int AddComplexityTest(const std::string &test_name, + const std::string &big_o_test_name, + const std::string &rms_test_name, + const std::string &big_o, int family_index) { SetSubstitutions({{"%name", test_name}, {"%bigo_name", big_o_test_name}, {"%rms_name", rms_test_name}, @@ -25,25 +28,33 @@ int AddComplexityTest(std::string test_name, std::string big_o_test_name, {{"^%bigo_name %bigo_str %bigo_str[ ]*$"}, {"^%bigo_name", MR_Not}, // Assert we we didn't only matched a name. {"^%rms_name %rms %rms[ ]*$", MR_Next}}); - AddCases(TC_JSONOut, {{"\"name\": \"%bigo_name\",$"}, - {"\"run_name\": \"%name\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": %int,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"BigO\",$", MR_Next}, - {"\"cpu_coefficient\": %float,$", MR_Next}, - {"\"real_coefficient\": %float,$", MR_Next}, - {"\"big_o\": \"%bigo\",$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}, - {"\"name\": \"%rms_name\",$"}, - {"\"run_name\": \"%name\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": %int,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"RMS\",$", MR_Next}, - {"\"rms\": %float$", MR_Next}, - {"}", MR_Next}}); + AddCases( + TC_JSONOut, + {{"\"name\": \"%bigo_name\",$"}, + {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"%name\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": %int,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"BigO\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"cpu_coefficient\": %float,$", MR_Next}, + {"\"real_coefficient\": %float,$", MR_Next}, + {"\"big_o\": \"%bigo\",$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}, + {"\"name\": \"%rms_name\",$"}, + {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"%name\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": %int,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"RMS\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"rms\": %float$", MR_Next}, + {"}", MR_Next}}); AddCases(TC_CSVOut, {{"^\"%bigo_name\",,%float,%float,%bigo,,,,,$"}, {"^\"%bigo_name\"", MR_Not}, {"^\"%rms_name\",,%float,%float,,,,,,$", MR_Next}}); @@ -56,7 +67,7 @@ int AddComplexityTest(std::string test_name, std::string big_o_test_name, // --------------------------- Testing BigO O(1) --------------------------- // // ========================================================================= // -void BM_Complexity_O1(benchmark::State& state) { +void BM_Complexity_O1(benchmark::State &state) { for (auto _ : state) { for (int i = 0; i < 1024; ++i) { benchmark::DoNotOptimize(&i); @@ -82,15 +93,15 @@ const char *lambda_big_o_1 = "f\\(N\\)"; // Add enum tests ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, - enum_big_o_1); + enum_big_o_1, /*family_index=*/0); // Add auto enum tests ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, - auto_big_o_1); + auto_big_o_1, /*family_index=*/1); // Add lambda tests ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, - lambda_big_o_1); + lambda_big_o_1, /*family_index=*/2); // ========================================================================= // // --------------------------- Testing BigO O(N) --------------------------- // @@ -105,7 +116,7 @@ std::vector ConstructRandomVector(int64_t size) { return v; } -void BM_Complexity_O_N(benchmark::State& state) { +void BM_Complexity_O_N(benchmark::State &state) { auto v = ConstructRandomVector(state.range(0)); // Test worst case scenario (item not in vector) const int64_t item_not_in_vector = state.range(0) * 2; @@ -137,17 +148,17 @@ const char *lambda_big_o_n = "f\\(N\\)"; // Add enum tests ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, - enum_auto_big_o_n); + enum_auto_big_o_n, /*family_index=*/3); // Add lambda tests ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, - lambda_big_o_n); + lambda_big_o_n, /*family_index=*/4); // ========================================================================= // // ------------------------- Testing BigO O(N*lgN) ------------------------- // // ========================================================================= // -static void BM_Complexity_O_N_log_N(benchmark::State& state) { +static void BM_Complexity_O_N_log_N(benchmark::State &state) { auto v = ConstructRandomVector(state.range(0)); for (auto _ : state) { std::sort(v.begin(), v.end()); @@ -178,17 +189,19 @@ const char *lambda_big_o_n_lg_n = "f\\(N\\)"; // Add enum tests ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, - rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n); + rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n, + /*family_index=*/6); // Add lambda tests ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, - rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n); + rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n, + /*family_index=*/7); // ========================================================================= // // -------- Testing formatting of Complexity with captured args ------------ // // ========================================================================= // -void BM_ComplexityCaptureArgs(benchmark::State& state, int n) { +void BM_ComplexityCaptureArgs(benchmark::State &state, int n) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero benchmark::DoNotOptimize(state.iterations()); @@ -204,7 +217,7 @@ const std::string complexity_capture_name = "BM_ComplexityCaptureArgs/capture_test"; ADD_COMPLEXITY_CASES(complexity_capture_name, complexity_capture_name + "_BigO", - complexity_capture_name + "_RMS", "N"); + complexity_capture_name + "_RMS", "N", /*family_index=*/9); // ========================================================================= // // --------------------------- TEST CASES END ------------------------------ // diff --git a/test/cxx03_test.cc b/test/cxx03_test.cc index c4c9a52..9711c1b 100644 --- a/test/cxx03_test.cc +++ b/test/cxx03_test.cc @@ -44,8 +44,7 @@ BENCHMARK_TEMPLATE(BM_template1, long); BENCHMARK_TEMPLATE1(BM_template1, int); template -struct BM_Fixture : public ::benchmark::Fixture { -}; +struct BM_Fixture : public ::benchmark::Fixture {}; BENCHMARK_TEMPLATE_F(BM_Fixture, BM_template1, long)(benchmark::State& state) { BM_empty(state); @@ -55,8 +54,8 @@ BENCHMARK_TEMPLATE1_F(BM_Fixture, BM_template2, int)(benchmark::State& state) { } void BM_counters(benchmark::State& state) { - BM_empty(state); - state.counters["Foo"] = 2; + BM_empty(state); + state.counters["Foo"] = 2; } BENCHMARK(BM_counters); diff --git a/test/diagnostics_test.cc b/test/diagnostics_test.cc index dd64a33..c54d5b0 100644 --- a/test/diagnostics_test.cc +++ b/test/diagnostics_test.cc @@ -26,7 +26,8 @@ void TestHandler() { } void try_invalid_pause_resume(benchmark::State& state) { -#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && !defined(TEST_HAS_NO_EXCEPTIONS) +#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && \ + !defined(TEST_HAS_NO_EXCEPTIONS) try { state.PauseTiming(); std::abort(); @@ -57,13 +58,12 @@ void BM_diagnostic_test(benchmark::State& state) { } BENCHMARK(BM_diagnostic_test); - void BM_diagnostic_test_keep_running(benchmark::State& state) { static bool called_once = false; if (called_once == false) try_invalid_pause_resume(state); - while(state.KeepRunning()) { + while (state.KeepRunning()) { benchmark::DoNotOptimize(state.iterations()); } diff --git a/test/display_aggregates_only_test.cc b/test/display_aggregates_only_test.cc index 3c36d3f..6ad65e7 100644 --- a/test/display_aggregates_only_test.cc +++ b/test/display_aggregates_only_test.cc @@ -19,21 +19,23 @@ BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->DisplayAggregatesOnly(); int main(int argc, char* argv[]) { const std::string output = GetFileReporterOutput(argc, argv); - if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 6 || + if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 7 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3\"") != 3 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != - 1) { - std::cout << "Precondition mismatch. Expected to only find 6 " + 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) { + std::cout << "Precondition mismatch. Expected to only find 8 " "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " + "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire " "output:\n"; std::cout << output; return 1; diff --git a/test/donotoptimize_assembly_test.cc b/test/donotoptimize_assembly_test.cc index d4b0bab..2e86a51 100644 --- a/test/donotoptimize_assembly_test.cc +++ b/test/donotoptimize_assembly_test.cc @@ -15,7 +15,7 @@ inline int Add42(int x) { return x + 42; } struct NotTriviallyCopyable { NotTriviallyCopyable(); explicit NotTriviallyCopyable(int x) : value(x) {} - NotTriviallyCopyable(NotTriviallyCopyable const&); + NotTriviallyCopyable(NotTriviallyCopyable const &); int value; }; @@ -23,7 +23,6 @@ struct Large { int value; int data[2]; }; - } // CHECK-LABEL: test_with_rvalue: extern "C" void test_with_rvalue() { @@ -118,8 +117,7 @@ extern "C" int test_div_by_two(int input) { // CHECK-LABEL: test_inc_integer: extern "C" int test_inc_integer() { int x = 0; - for (int i=0; i < 5; ++i) - benchmark::DoNotOptimize(++x); + for (int i = 0; i < 5; ++i) benchmark::DoNotOptimize(++x); // CHECK: movl $1, [[DEST:.*]] // CHECK: {{(addl \$1,|incl)}} [[DEST]] // CHECK: {{(addl \$1,|incl)}} [[DEST]] @@ -147,7 +145,7 @@ extern "C" void test_pointer_const_lvalue() { // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]]) // CHECK: ret int x = 42; - int * const xp = &x; + int *const xp = &x; benchmark::DoNotOptimize(xp); } diff --git a/test/donotoptimize_test.cc b/test/donotoptimize_test.cc index 2ce92d1..c321f15 100644 --- a/test/donotoptimize_test.cc +++ b/test/donotoptimize_test.cc @@ -1,27 +1,28 @@ -#include "benchmark/benchmark.h" - #include +#include "benchmark/benchmark.h" + namespace { #if defined(__GNUC__) std::uint64_t double_up(const std::uint64_t x) __attribute__((const)); #endif std::uint64_t double_up(const std::uint64_t x) { return x * 2; } -} +} // namespace // Using DoNotOptimize on types like BitRef seem to cause a lot of problems // with the inline assembly on both GCC and Clang. struct BitRef { int index; - unsigned char &byte; + unsigned char& byte; -public: + public: static BitRef Make() { static unsigned char arr[2] = {}; BitRef b(1, arr[0]); return b; } -private: + + private: BitRef(int i, unsigned char& b) : index(i), byte(b) {} }; diff --git a/test/filter_test.cc b/test/filter_test.cc index 0e27065..a567de2 100644 --- a/test/filter_test.cc +++ b/test/filter_test.cc @@ -1,36 +1,41 @@ -#include "benchmark/benchmark.h" - +#include #include #include #include #include - #include #include #include #include +#include "benchmark/benchmark.h" + namespace { class TestReporter : public benchmark::ConsoleReporter { public: - virtual bool ReportContext(const Context& context) { + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { return ConsoleReporter::ReportContext(context); }; - virtual void ReportRuns(const std::vector& report) { + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { ++count_; + max_family_index_ = + std::max(max_family_index_, report[0].family_index); ConsoleReporter::ReportRuns(report); }; - TestReporter() : count_(0) {} + TestReporter() : count_(0), max_family_index_(0) {} virtual ~TestReporter() {} size_t GetCount() const { return count_; } + size_t GetMaxFamilyIndex() const { return max_family_index_; } + private: mutable size_t count_; + mutable size_t max_family_index_; }; } // end namespace @@ -65,7 +70,7 @@ static void BM_FooBa(benchmark::State& state) { } BENCHMARK(BM_FooBa); -int main(int argc, char **argv) { +int main(int argc, char** argv) { bool list_only = false; for (int i = 0; i < argc; ++i) list_only |= std::string(argv[i]).find("--benchmark_list_tests") != @@ -98,6 +103,15 @@ int main(int argc, char **argv) { << std::endl; return -1; } + + const size_t max_family_index = test_reporter.GetMaxFamilyIndex(); + const size_t num_families = reports_count == 0 ? 0 : 1 + max_family_index; + if (num_families != expected_reports) { + std::cerr << "ERROR: Expected " << expected_reports + << " test families to be run but num_families = " + << num_families << std::endl; + return -1; + } } return 0; diff --git a/test/fixture_test.cc b/test/fixture_test.cc index 1462b10..af650db 100644 --- a/test/fixture_test.cc +++ b/test/fixture_test.cc @@ -1,39 +1,41 @@ -#include "benchmark/benchmark.h" - #include #include -class MyFixture : public ::benchmark::Fixture { +#include "benchmark/benchmark.h" + +#define FIXTURE_BECHMARK_NAME MyFixture + +class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { public: - void SetUp(const ::benchmark::State& state) { - if (state.thread_index == 0) { + void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { + if (state.thread_index() == 0) { assert(data.get() == nullptr); data.reset(new int(42)); } } - void TearDown(const ::benchmark::State& state) { - if (state.thread_index == 0) { + void TearDown(const ::benchmark::State& state) BENCHMARK_OVERRIDE { + if (state.thread_index() == 0) { assert(data.get() != nullptr); data.reset(); } } - ~MyFixture() { assert(data == nullptr); } + ~FIXTURE_BECHMARK_NAME() { assert(data == nullptr); } std::unique_ptr data; }; -BENCHMARK_F(MyFixture, Foo)(benchmark::State &st) { +BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State& st) { assert(data.get() != nullptr); assert(*data == 42); for (auto _ : st) { } } -BENCHMARK_DEFINE_F(MyFixture, Bar)(benchmark::State& st) { - if (st.thread_index == 0) { +BENCHMARK_DEFINE_F(FIXTURE_BECHMARK_NAME, Bar)(benchmark::State& st) { + if (st.thread_index() == 0) { assert(data.get() != nullptr); assert(*data == 42); } @@ -43,7 +45,7 @@ BENCHMARK_DEFINE_F(MyFixture, Bar)(benchmark::State& st) { } st.SetItemsProcessed(st.range(0)); } -BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42); -BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42)->ThreadPerCpu(); +BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, Bar)->Arg(42); +BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, Bar)->Arg(42)->ThreadPerCpu(); BENCHMARK_MAIN(); diff --git a/test/internal_threading_test.cc b/test/internal_threading_test.cc index 039d7c1..62b5b95 100644 --- a/test/internal_threading_test.cc +++ b/test/internal_threading_test.cc @@ -3,6 +3,7 @@ #include #include + #include "../src/timers.h" #include "benchmark/benchmark.h" #include "output_test.h" diff --git a/test/map_test.cc b/test/map_test.cc index dbf7982..5096134 100644 --- a/test/map_test.cc +++ b/test/map_test.cc @@ -1,8 +1,8 @@ -#include "benchmark/benchmark.h" - #include #include +#include "benchmark/benchmark.h" + namespace { std::map ConstructRandomMap(int size) { @@ -34,11 +34,11 @@ BENCHMARK(BM_MapLookup)->Range(1 << 3, 1 << 12); // Using fixtures. class MapFixture : public ::benchmark::Fixture { public: - void SetUp(const ::benchmark::State& st) { + void SetUp(const ::benchmark::State& st) BENCHMARK_OVERRIDE { m = ConstructRandomMap(static_cast(st.range(0))); } - void TearDown(const ::benchmark::State&) { m.clear(); } + void TearDown(const ::benchmark::State&) BENCHMARK_OVERRIDE { m.clear(); } std::map m; }; diff --git a/test/memory_manager_test.cc b/test/memory_manager_test.cc index 90bed16..f0c192f 100644 --- a/test/memory_manager_test.cc +++ b/test/memory_manager_test.cc @@ -5,8 +5,8 @@ #include "output_test.h" class TestMemoryManager : public benchmark::MemoryManager { - void Start() {} - void Stop(Result* result) { + void Start() BENCHMARK_OVERRIDE {} + void Stop(Result* result) BENCHMARK_OVERRIDE { result->num_allocs = 42; result->max_bytes_used = 42000; } @@ -21,9 +21,11 @@ BENCHMARK(BM_empty); ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_empty\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, diff --git a/test/multiple_ranges_test.cc b/test/multiple_ranges_test.cc index b25f40e..7618c4d 100644 --- a/test/multiple_ranges_test.cc +++ b/test/multiple_ranges_test.cc @@ -1,10 +1,10 @@ -#include "benchmark/benchmark.h" - #include #include #include #include +#include "benchmark/benchmark.h" + class MultipleRangesFixture : public ::benchmark::Fixture { public: MultipleRangesFixture() @@ -28,7 +28,7 @@ class MultipleRangesFixture : public ::benchmark::Fixture { {2, 7, 15}, {7, 6, 3}}) {} - void SetUp(const ::benchmark::State& state) { + void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { std::vector ranges = {state.range(0), state.range(1), state.range(2)}; @@ -42,7 +42,7 @@ class MultipleRangesFixture : public ::benchmark::Fixture { virtual ~MultipleRangesFixture() { if (actualValues != expectedValues) { std::cout << "EXPECTED\n"; - for (auto v : expectedValues) { + for (const auto& v : expectedValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; @@ -50,7 +50,7 @@ class MultipleRangesFixture : public ::benchmark::Fixture { std::cout << "}\n"; } std::cout << "ACTUAL\n"; - for (auto v : actualValues) { + for (const auto& v : actualValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; diff --git a/test/options_test.cc b/test/options_test.cc index 7bfc235..d424d40 100644 --- a/test/options_test.cc +++ b/test/options_test.cc @@ -1,7 +1,8 @@ -#include "benchmark/benchmark.h" #include #include +#include "benchmark/benchmark.h" + #if defined(NDEBUG) #undef NDEBUG #endif @@ -25,6 +26,7 @@ BENCHMARK(BM_basic)->Arg(42); BENCHMARK(BM_basic_slow)->Arg(10)->Unit(benchmark::kNanosecond); BENCHMARK(BM_basic_slow)->Arg(100)->Unit(benchmark::kMicrosecond); BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kSecond); BENCHMARK(BM_basic)->Range(1, 8); BENCHMARK(BM_basic)->RangeMultiplier(2)->Range(1, 8); BENCHMARK(BM_basic)->DenseRange(10, 15); @@ -64,11 +66,9 @@ void BM_explicit_iteration_count(benchmark::State& state) { // Test that the requested iteration count is respected. assert(state.max_iterations == 42); size_t actual_iterations = 0; - for (auto _ : state) - ++actual_iterations; + for (auto _ : state) ++actual_iterations; assert(state.iterations() == state.max_iterations); assert(state.iterations() == 42); - } BENCHMARK(BM_explicit_iteration_count)->Iterations(42); diff --git a/test/output_test.h b/test/output_test.h index 9385761..c6ff8ef 100644 --- a/test/output_test.h +++ b/test/output_test.h @@ -85,7 +85,7 @@ std::string GetFileReporterOutput(int argc, char* argv[]); struct Results; typedef std::function ResultsCheckFn; -size_t AddChecker(const char* bm_name_pattern, ResultsCheckFn fn); +size_t AddChecker(const char* bm_name_pattern, const ResultsCheckFn& fn); // Class holding the results of a benchmark. // It is passed in calls to checker functions. @@ -113,9 +113,7 @@ struct Results { return NumIterations() * GetTime(kRealTime); } // get the cpu_time duration of the benchmark in seconds - double DurationCPUTime() const { - return NumIterations() * GetTime(kCpuTime); - } + double DurationCPUTime() const { return NumIterations() * GetTime(kCpuTime); } // get the string for a result by name, or nullptr if the name // is not found @@ -143,12 +141,12 @@ struct Results { template T Results::GetAs(const char* entry_name) const { auto* sv = Get(entry_name); - CHECK(sv != nullptr && !sv->empty()); + BM_CHECK(sv != nullptr && !sv->empty()); std::stringstream ss; ss << *sv; T out; ss >> out; - CHECK(!ss.fail()); + BM_CHECK(!ss.fail()); return out; } @@ -158,8 +156,8 @@ T Results::GetAs(const char* entry_name) const { // clang-format off -#define _CHECK_RESULT_VALUE(entry, getfn, var_type, var_name, relationship, value) \ - CONCAT(CHECK_, relationship) \ +#define CHECK_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value) \ + CONCAT(BM_CHECK_, relationship) \ (entry.getfn< var_type >(var_name), (value)) << "\n" \ << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ << __FILE__ << ":" << __LINE__ << ": " \ @@ -169,8 +167,8 @@ T Results::GetAs(const char* entry_name) const { // check with tolerance. eps_factor is the tolerance window, which is // interpreted relative to value (eg, 0.1 means 10% of value). -#define _CHECK_FLOAT_RESULT_VALUE(entry, getfn, var_type, var_name, relationship, value, eps_factor) \ - CONCAT(CHECK_FLOAT_, relationship) \ +#define CHECK_FLOAT_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value, eps_factor) \ + CONCAT(BM_CHECK_FLOAT_, relationship) \ (entry.getfn< var_type >(var_name), (value), (eps_factor) * (value)) << "\n" \ << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ << __FILE__ << ":" << __LINE__ << ": " \ @@ -187,16 +185,16 @@ T Results::GetAs(const char* entry_name) const { << "%)" #define CHECK_RESULT_VALUE(entry, var_type, var_name, relationship, value) \ - _CHECK_RESULT_VALUE(entry, GetAs, var_type, var_name, relationship, value) + CHECK_RESULT_VALUE_IMPL(entry, GetAs, var_type, var_name, relationship, value) #define CHECK_COUNTER_VALUE(entry, var_type, var_name, relationship, value) \ - _CHECK_RESULT_VALUE(entry, GetCounterAs, var_type, var_name, relationship, value) + CHECK_RESULT_VALUE_IMPL(entry, GetCounterAs, var_type, var_name, relationship, value) #define CHECK_FLOAT_RESULT_VALUE(entry, var_name, relationship, value, eps_factor) \ - _CHECK_FLOAT_RESULT_VALUE(entry, GetAs, double, var_name, relationship, value, eps_factor) + CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetAs, double, var_name, relationship, value, eps_factor) #define CHECK_FLOAT_COUNTER_VALUE(entry, var_name, relationship, value, eps_factor) \ - _CHECK_FLOAT_RESULT_VALUE(entry, GetCounterAs, double, var_name, relationship, value, eps_factor) + CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetCounterAs, double, var_name, relationship, value, eps_factor) // clang-format on diff --git a/test/output_test_helper.cc b/test/output_test_helper.cc index f99b3a8..81584cb 100644 --- a/test/output_test_helper.cc +++ b/test/output_test_helper.cc @@ -10,6 +10,7 @@ #include "../src/benchmark_api_internal.h" #include "../src/check.h" // NOTE: check.h is for internal use only! +#include "../src/log.h" // NOTE: log.h is for internal use only #include "../src/re.h" // NOTE: re.h is for internal use only #include "output_test.h" @@ -40,14 +41,20 @@ SubMap& GetSubstitutions() { // clang-format off static std::string safe_dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; static std::string time_re = "([0-9]+[.])?[0-9]+"; + static std::string percentage_re = "[0-9]+[.][0-9]{2}"; static SubMap map = { {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"}, // human-readable float {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kMGTPEZYmunpfazy]?"}, + {"%percentage", percentage_re}, {"%int", "[ ]*[0-9]+"}, {" %s ", "[ ]+"}, {"%time", "[ ]*" + time_re + "[ ]+ns"}, {"%console_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns [ ]*[0-9]+"}, + {"%console_percentage_report", "[ ]*" + percentage_re + "[ ]+% [ ]*" + percentage_re + "[ ]+% [ ]*[0-9]+"}, + {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, + {"%console_ms_report", "[ ]*" + time_re + "[ ]+ms [ ]*" + time_re + "[ ]+ms [ ]*[0-9]+"}, + {"%console_s_report", "[ ]*" + time_re + "[ ]+s [ ]*" + time_re + "[ ]+s [ ]*[0-9]+"}, {"%console_time_only_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns"}, {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, {"%console_us_time_only_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us"}, @@ -56,6 +63,8 @@ SubMap& GetSubstitutions() { "items_per_second,label,error_occurred,error_message"}, {"%csv_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,,,"}, {"%csv_us_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",us,,,,,"}, + {"%csv_ms_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ms,,,,,"}, + {"%csv_s_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",s,,,,,"}, {"%csv_bytes_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + ",,,,"}, {"%csv_items_report", @@ -89,27 +98,27 @@ void CheckCase(std::stringstream& remaining_output, TestCase const& TC, bool on_first = true; std::string line; while (remaining_output.eof() == false) { - CHECK(remaining_output.good()); + BM_CHECK(remaining_output.good()); std::getline(remaining_output, line); if (on_first) { first_line = line; on_first = false; } for (const auto& NC : not_checks) { - CHECK(!NC.regex->Match(line)) + BM_CHECK(!NC.regex->Match(line)) << "Unexpected match for line \"" << line << "\" for MR_Not regex \"" << NC.regex_str << "\"" << "\n actual regex string \"" << TC.substituted_regex << "\"" << "\n started matching near: " << first_line; } if (TC.regex->Match(line)) return; - CHECK(TC.match_rule != MR_Next) + BM_CHECK(TC.match_rule != MR_Next) << "Expected line \"" << line << "\" to match regex \"" << TC.regex_str << "\"" << "\n actual regex string \"" << TC.substituted_regex << "\"" << "\n started matching near: " << first_line; } - CHECK(remaining_output.eof() == false) + BM_CHECK(remaining_output.eof() == false) << "End of output reached before match for regex \"" << TC.regex_str << "\" was found" << "\n actual regex string \"" << TC.substituted_regex << "\"" @@ -132,14 +141,14 @@ void CheckCases(TestCaseList const& checks, std::stringstream& output) { class TestReporter : public benchmark::BenchmarkReporter { public: TestReporter(std::vector reps) - : reporters_(reps) {} + : reporters_(std::move(reps)) {} - virtual bool ReportContext(const Context& context) { + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { bool last_ret = false; bool first = true; for (auto rep : reporters_) { bool new_ret = rep->ReportContext(context); - CHECK(first || new_ret == last_ret) + BM_CHECK(first || new_ret == last_ret) << "Reports return different values for ReportContext"; first = false; last_ret = new_ret; @@ -148,10 +157,10 @@ class TestReporter : public benchmark::BenchmarkReporter { return last_ret; } - void ReportRuns(const std::vector& report) { + void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { for (auto rep : reporters_) rep->ReportRuns(report); } - void Finalize() { + void Finalize() BENCHMARK_OVERRIDE { for (auto rep : reporters_) rep->Finalize(); } @@ -174,7 +183,7 @@ class ResultsChecker { public: struct PatternAndFn : public TestCase { // reusing TestCase for its regexes PatternAndFn(const std::string& rx, ResultsCheckFn fn_) - : TestCase(rx), fn(fn_) {} + : TestCase(rx), fn(std::move(fn_)) {} ResultsCheckFn fn; }; @@ -182,7 +191,7 @@ class ResultsChecker { std::vector results; std::vector field_names; - void Add(const std::string& entry_pattern, ResultsCheckFn fn); + void Add(const std::string& entry_pattern, const ResultsCheckFn& fn); void CheckResults(std::stringstream& output); @@ -201,7 +210,8 @@ ResultsChecker& GetResultsChecker() { } // add a results checker for a benchmark -void ResultsChecker::Add(const std::string& entry_pattern, ResultsCheckFn fn) { +void ResultsChecker::Add(const std::string& entry_pattern, + const ResultsCheckFn& fn) { check_patterns.emplace_back(entry_pattern, fn); } @@ -221,7 +231,7 @@ void ResultsChecker::CheckResults(std::stringstream& output) { std::string line; bool on_first = true; while (output.eof() == false) { - CHECK(output.good()); + BM_CHECK(output.good()); std::getline(output, line); if (on_first) { SetHeader_(line); // this is important @@ -232,18 +242,18 @@ void ResultsChecker::CheckResults(std::stringstream& output) { } // finally we can call the subscribed check functions for (const auto& p : check_patterns) { - VLOG(2) << "--------------------------------\n"; - VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n"; + BM_VLOG(2) << "--------------------------------\n"; + BM_VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n"; for (const auto& r : results) { if (!p.regex->Match(r.name)) { - VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n"; + BM_VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n"; continue; } else { - VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; + BM_VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; } - VLOG(1) << "Checking results of " << r.name << ": ... \n"; + BM_VLOG(1) << "Checking results of " << r.name << ": ... \n"; p.fn(r); - VLOG(1) << "Checking results of " << r.name << ": OK.\n"; + BM_VLOG(1) << "Checking results of " << r.name << ": OK.\n"; } } } @@ -256,9 +266,9 @@ void ResultsChecker::SetHeader_(const std::string& csv_header) { // set the values for a benchmark void ResultsChecker::SetValues_(const std::string& entry_csv_line) { if (entry_csv_line.empty()) return; // some lines are empty - CHECK(!field_names.empty()); + BM_CHECK(!field_names.empty()); auto vals = SplitCsv_(entry_csv_line); - CHECK_EQ(vals.size(), field_names.size()); + BM_CHECK_EQ(vals.size(), field_names.size()); results.emplace_back(vals[0]); // vals[0] is the benchmark name auto& entry = results.back(); for (size_t i = 1, e = vals.size(); i < e; ++i) { @@ -273,7 +283,7 @@ std::vector ResultsChecker::SplitCsv_(const std::string& line) { if (!field_names.empty()) out.reserve(field_names.size()); size_t prev = 0, pos = line.find_first_of(','), curr = pos; while (pos != line.npos) { - CHECK(curr > 0); + BM_CHECK(curr > 0); if (line[prev] == '"') ++prev; if (line[curr - 1] == '"') --curr; out.push_back(line.substr(prev, curr - prev)); @@ -290,7 +300,7 @@ std::vector ResultsChecker::SplitCsv_(const std::string& line) { } // end namespace internal -size_t AddChecker(const char* bm_name, ResultsCheckFn fn) { +size_t AddChecker(const char* bm_name, const ResultsCheckFn& fn) { auto& rc = internal::GetResultsChecker(); rc.Add(bm_name, fn); return rc.results.size(); @@ -304,20 +314,18 @@ int Results::NumThreads() const { ss << name.substr(pos + 9, end); int num = 1; ss >> num; - CHECK(!ss.fail()); + BM_CHECK(!ss.fail()); return num; } -double Results::NumIterations() const { - return GetAs("iterations"); -} +double Results::NumIterations() const { return GetAs("iterations"); } double Results::GetTime(BenchmarkTime which) const { - CHECK(which == kCpuTime || which == kRealTime); + BM_CHECK(which == kCpuTime || which == kRealTime); const char* which_str = which == kCpuTime ? "cpu_time" : "real_time"; double val = GetAs(which_str); auto unit = Get("time_unit"); - CHECK(unit); + BM_CHECK(unit); if (*unit == "ns") { return val * 1.e-9; } else if (*unit == "us") { @@ -327,7 +335,7 @@ double Results::GetTime(BenchmarkTime which) const { } else if (*unit == "s") { return val; } else { - CHECK(1 == 0) << "unknown time unit: " << *unit; + BM_CHECK(1 == 0) << "unknown time unit: " << *unit; return 0; } } @@ -343,10 +351,10 @@ TestCase::TestCase(std::string re, int rule) regex(std::make_shared()) { std::string err_str; regex->Init(substituted_regex, &err_str); - CHECK(err_str.empty()) << "Could not construct regex \"" << substituted_regex - << "\"" - << "\n originally \"" << regex_str << "\"" - << "\n got error: " << err_str; + BM_CHECK(err_str.empty()) + << "Could not construct regex \"" << substituted_regex << "\"" + << "\n originally \"" << regex_str << "\"" + << "\n got error: " << err_str; } int AddCases(TestCaseID ID, std::initializer_list il) { @@ -375,10 +383,8 @@ int SetSubstitutions( // Disable deprecated warnings temporarily because we need to reference // CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif +BENCHMARK_DISABLE_DEPRECATED_WARNING + void RunOutputTests(int argc, char* argv[]) { using internal::GetTestCaseList; benchmark::Initialize(&argc, argv); @@ -433,13 +439,11 @@ void RunOutputTests(int argc, char* argv[]) { // the checks to subscribees. auto& csv = TestCases[2]; // would use == but gcc spits a warning - CHECK(std::strcmp(csv.name, "CSVReporter") == 0); + BM_CHECK(std::strcmp(csv.name, "CSVReporter") == 0); internal::GetResultsChecker().CheckResults(csv.out_stream); } -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif +BENCHMARK_RESTORE_DEPRECATED_WARNING int SubstrCnt(const std::string& haystack, const std::string& pat) { if (pat.length() == 0) return 0; @@ -463,9 +467,8 @@ static char RandomHexChar() { static std::string GetRandomFileName() { std::string model = "test.%%%%%%"; - for (auto & ch : model) { - if (ch == '%') - ch = RandomHexChar(); + for (auto& ch : model) { + if (ch == '%') ch = RandomHexChar(); } return model; } @@ -482,8 +485,7 @@ static std::string GetTempFileName() { int retries = 3; while (--retries) { std::string name = GetRandomFileName(); - if (!FileExists(name)) - return name; + if (!FileExists(name)) return name; } std::cerr << "Failed to create unique temporary file name" << std::endl; std::abort(); diff --git a/test/perf_counters_gtest.cc b/test/perf_counters_gtest.cc new file mode 100644 index 0000000..3eac624 --- /dev/null +++ b/test/perf_counters_gtest.cc @@ -0,0 +1,145 @@ +#include + +#include "../src/perf_counters.h" +#include "gtest/gtest.h" + +#ifndef GTEST_SKIP +struct MsgHandler { + void operator=(std::ostream&) {} +}; +#define GTEST_SKIP() return MsgHandler() = std::cout +#endif + +using benchmark::internal::PerfCounters; +using benchmark::internal::PerfCounterValues; + +namespace { +const char kGenericPerfEvent1[] = "CYCLES"; +const char kGenericPerfEvent2[] = "BRANCHES"; +const char kGenericPerfEvent3[] = "INSTRUCTIONS"; + +TEST(PerfCountersTest, Init) { + EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported); +} + +TEST(PerfCountersTest, OneCounter) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Performance counters not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid()); +} + +TEST(PerfCountersTest, NegativeTest) { + if (!PerfCounters::kSupported) { + EXPECT_FALSE(PerfCounters::Initialize()); + return; + } + EXPECT_TRUE(PerfCounters::Initialize()); + EXPECT_FALSE(PerfCounters::Create({}).IsValid()); + EXPECT_FALSE(PerfCounters::Create({""}).IsValid()); + EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid()); + { + EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, + kGenericPerfEvent3}) + .IsValid()); + } + EXPECT_FALSE( + PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1}) + .IsValid()); + EXPECT_FALSE(PerfCounters::Create({kGenericPerfEvent3, "not a counter name", + kGenericPerfEvent1}) + .IsValid()); + { + EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, + kGenericPerfEvent3}) + .IsValid()); + } + EXPECT_FALSE( + PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, + kGenericPerfEvent3, "MISPREDICTED_BRANCH_RETIRED"}) + .IsValid()); +} + +TEST(PerfCountersTest, Read1Counter) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + auto counters = PerfCounters::Create({kGenericPerfEvent1}); + EXPECT_TRUE(counters.IsValid()); + PerfCounterValues values1(1); + EXPECT_TRUE(counters.Snapshot(&values1)); + EXPECT_GT(values1[0], 0); + PerfCounterValues values2(1); + EXPECT_TRUE(counters.Snapshot(&values2)); + EXPECT_GT(values2[0], 0); + EXPECT_GT(values2[0], values1[0]); +} + +TEST(PerfCountersTest, Read2Counters) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + auto counters = + PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2}); + EXPECT_TRUE(counters.IsValid()); + PerfCounterValues values1(2); + EXPECT_TRUE(counters.Snapshot(&values1)); + EXPECT_GT(values1[0], 0); + EXPECT_GT(values1[1], 0); + PerfCounterValues values2(2); + EXPECT_TRUE(counters.Snapshot(&values2)); + EXPECT_GT(values2[0], 0); + EXPECT_GT(values2[1], 0); +} + +size_t do_work() { + size_t res = 0; + for (size_t i = 0; i < 100000000; ++i) res += i * i; + return res; +} + +void measure(size_t threadcount, PerfCounterValues* values1, + PerfCounterValues* values2) { + BM_CHECK_NE(values1, nullptr); + BM_CHECK_NE(values2, nullptr); + std::vector threads(threadcount); + auto work = [&]() { BM_CHECK(do_work() > 1000); }; + + // We need to first set up the counters, then start the threads, so the + // threads would inherit the counters. But later, we need to first destroy the + // thread pool (so all the work finishes), then measure the counters. So the + // scopes overlap, and we need to explicitly control the scope of the + // threadpool. + auto counters = + PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3}); + for (auto& t : threads) t = std::thread(work); + counters.Snapshot(values1); + for (auto& t : threads) t.join(); + counters.Snapshot(values2); +} + +TEST(PerfCountersTest, MultiThreaded) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported."; + } + EXPECT_TRUE(PerfCounters::Initialize()); + PerfCounterValues values1(2); + PerfCounterValues values2(2); + + measure(2, &values1, &values2); + std::vector D1{static_cast(values2[0] - values1[0]), + static_cast(values2[1] - values1[1])}; + + measure(4, &values1, &values2); + std::vector D2{static_cast(values2[0] - values1[0]), + static_cast(values2[1] - values1[1])}; + + // Some extra work will happen on the main thread - like joining the threads + // - so the ratio won't be quite 2.0, but very close. + EXPECT_GE(D2[0], 1.9 * D1[0]); + EXPECT_GE(D2[1], 1.9 * D1[1]); +} +} // namespace diff --git a/test/perf_counters_test.cc b/test/perf_counters_test.cc new file mode 100644 index 0000000..3017a45 --- /dev/null +++ b/test/perf_counters_test.cc @@ -0,0 +1,27 @@ +#undef NDEBUG + +#include "../src/perf_counters.h" + +#include "benchmark/benchmark.h" +#include "output_test.h" + +static void BM_Simple(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(state.iterations()); + } +} +BENCHMARK(BM_Simple); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}}); + +static void CheckSimple(Results const& e) { + CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0); + CHECK_COUNTER_VALUE(e, double, "BRANCHES", GT, 0.0); +} +CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple); + +int main(int argc, char* argv[]) { + if (!benchmark::internal::PerfCounters::kSupported) { + return 0; + } + RunOutputTests(argc, argv); +} diff --git a/test/register_benchmark_test.cc b/test/register_benchmark_test.cc index 3ac5b21..602405b 100644 --- a/test/register_benchmark_test.cc +++ b/test/register_benchmark_test.cc @@ -10,7 +10,7 @@ namespace { class TestReporter : public benchmark::ConsoleReporter { public: - virtual void ReportRuns(const std::vector& report) { + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { all_runs_.insert(all_runs_.end(), begin(report), end(report)); ConsoleReporter::ReportRuns(report); } @@ -30,13 +30,13 @@ struct TestCase { void CheckRun(Run const& run) const { // clang-format off - CHECK(name == run.benchmark_name()) << "expected " << name << " got " + BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got " << run.benchmark_name(); if (label) { - CHECK(run.report_label == label) << "expected " << label << " got " + BM_CHECK(run.report_label == label) << "expected " << label << " got " << run.report_label; } else { - CHECK(run.report_label == ""); + BM_CHECK(run.report_label.empty()); } // clang-format on } @@ -45,7 +45,7 @@ struct TestCase { std::vector ExpectedResults; int AddCases(std::initializer_list const& v) { - for (auto N : v) { + for (const auto& N : v) { ExpectedResults.push_back(N); } return 0; diff --git a/test/repetitions_test.cc b/test/repetitions_test.cc new file mode 100644 index 0000000..569777d --- /dev/null +++ b/test/repetitions_test.cc @@ -0,0 +1,214 @@ + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// ========================================================================= // +// ------------------------ Testing Basic Output --------------------------- // +// ========================================================================= // + +static void BM_ExplicitRepetitions(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_ExplicitRepetitions)->Repetitions(2); + +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2_mean %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2_median %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2_stddev %console_report$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_mean\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_ExplicitRepetitions/repeats:2_mean\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_ExplicitRepetitions/repeats:2_median\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_ExplicitRepetitions/repeats:2_stddev\",%csv_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Basic Output --------------------------- // +// ========================================================================= // + +static void BM_ImplicitRepetitions(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_ImplicitRepetitions); + +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_mean %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_median %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_stddev %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_mean\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_median\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_stddev\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_mean\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_median\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_stddev\",%csv_report$"}}); + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + +int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/test/report_aggregates_only_test.cc b/test/report_aggregates_only_test.cc index 9646b9b..47da503 100644 --- a/test/report_aggregates_only_test.cc +++ b/test/report_aggregates_only_test.cc @@ -19,17 +19,19 @@ BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); int main(int argc, char* argv[]) { const std::string output = GetFileReporterOutput(argc, argv); - if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 3 || + if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 4 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != - 1) { - std::cout << "Precondition mismatch. Expected to only find three " + 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) { + std::cout << "Precondition mismatch. Expected to only find four " "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " + "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire " "output:\n"; std::cout << output; return 1; diff --git a/test/reporter_output_test.cc b/test/reporter_output_test.cc index bcce007..2b6e654 100644 --- a/test/reporter_output_test.cc +++ b/test/reporter_output_test.cc @@ -1,5 +1,6 @@ #undef NDEBUG +#include #include #include "benchmark/benchmark.h" @@ -71,9 +72,11 @@ BENCHMARK(BM_basic); ADD_CASES(TC_ConsoleOut, {{"^BM_basic %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_basic\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_basic\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -99,9 +102,11 @@ BENCHMARK(BM_bytes_per_second); ADD_CASES(TC_ConsoleOut, {{"^BM_bytes_per_second %console_report " "bytes_per_second=%float[kM]{0,1}/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_bytes_per_second\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_bytes_per_second\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -128,9 +133,11 @@ BENCHMARK(BM_items_per_second); ADD_CASES(TC_ConsoleOut, {{"^BM_items_per_second %console_report " "items_per_second=%float[kM]{0,1}/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_items_per_second\",$"}, + {"\"family_index\": 2,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_items_per_second\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -154,9 +161,11 @@ BENCHMARK(BM_label); ADD_CASES(TC_ConsoleOut, {{"^BM_label %console_report some label$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_label\",$"}, + {"\"family_index\": 3,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_label\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -168,6 +177,101 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_label\",$"}, ADD_CASES(TC_CSVOut, {{"^\"BM_label\",%csv_label_report_begin\"some " "label\"%csv_label_report_end$"}}); +// ========================================================================= // +// ------------------------ Testing Time Label Output ---------------------- // +// ========================================================================= // + +void BM_time_label_nanosecond(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_time_label_nanosecond)->Unit(benchmark::kNanosecond); + +ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_nanosecond %console_report$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_time_label_nanosecond\",$"}, + {"\"family_index\": 4,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_time_label_nanosecond\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_nanosecond\",%csv_report$"}}); + +void BM_time_label_microsecond(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_time_label_microsecond)->Unit(benchmark::kMicrosecond); + +ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_microsecond %console_us_report$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_time_label_microsecond\",$"}, + {"\"family_index\": 5,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_time_label_microsecond\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"us\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_microsecond\",%csv_us_report$"}}); + +void BM_time_label_millisecond(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_time_label_millisecond)->Unit(benchmark::kMillisecond); + +ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_millisecond %console_ms_report$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_time_label_millisecond\",$"}, + {"\"family_index\": 6,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_time_label_millisecond\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ms\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_millisecond\",%csv_ms_report$"}}); + +void BM_time_label_second(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_time_label_second)->Unit(benchmark::kSecond); + +ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_second %console_s_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_second\",$"}, + {"\"family_index\": 7,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_time_label_second\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"s\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_second\",%csv_s_report$"}}); + // ========================================================================= // // ------------------------ Testing Error Output --------------------------- // // ========================================================================= // @@ -180,9 +284,11 @@ void BM_error(benchmark::State& state) { BENCHMARK(BM_error); ADD_CASES(TC_ConsoleOut, {{"^BM_error[ ]+ERROR OCCURRED: 'message'$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_error\",$"}, + {"\"family_index\": 8,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_error\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"error_occurred\": true,$", MR_Next}, @@ -202,9 +308,11 @@ void BM_no_arg_name(benchmark::State& state) { BENCHMARK(BM_no_arg_name)->Arg(3); ADD_CASES(TC_ConsoleOut, {{"^BM_no_arg_name/3 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_no_arg_name/3\",$"}, + {"\"family_index\": 9,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_no_arg_name/3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}}); @@ -220,9 +328,11 @@ void BM_arg_name(benchmark::State& state) { BENCHMARK(BM_arg_name)->ArgName("first")->Arg(3); ADD_CASES(TC_ConsoleOut, {{"^BM_arg_name/first:3 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_name/first:3\",$"}, + {"\"family_index\": 10,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_arg_name/first:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_arg_name/first:3\",%csv_report$"}}); @@ -240,13 +350,41 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_arg_names/first:2/5/third:4 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_names/first:2/5/third:4\",$"}, + {"\"family_index\": 11,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_arg_names/first:2/5/third:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_arg_names/first:2/5/third:4\",%csv_report$"}}); +// ========================================================================= // +// ------------------------ Testing Name Output ---------------------------- // +// ========================================================================= // + +void BM_name(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_name)->Name("BM_custom_name"); + +ADD_CASES(TC_ConsoleOut, {{"^BM_custom_name %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_custom_name\",$"}, + {"\"family_index\": 12,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_custom_name\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_custom_name\",%csv_report$"}}); + // ========================================================================= // // ------------------------ Testing Big Args Output ------------------------ // // ========================================================================= // @@ -294,37 +432,50 @@ ADD_CASES(TC_ConsoleOut, {"^BM_Repeat/repeats:2_median %console_time_only_report [ ]*2$"}, {"^BM_Repeat/repeats:2_stddev %console_time_only_report [ ]*2$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:2\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\"", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_mean\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_median\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_stddev\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:2\",%csv_report$"}, {"^\"BM_Repeat/repeats:2\",%csv_report$"}, @@ -341,43 +492,58 @@ ADD_CASES(TC_ConsoleOut, {"^BM_Repeat/repeats:3_median %console_time_only_report [ ]*3$"}, {"^BM_Repeat/repeats:3_stddev %console_time_only_report [ ]*3$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_mean\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_median\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_stddev\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:3\",%csv_report$"}, {"^\"BM_Repeat/repeats:3\",%csv_report$"}, @@ -396,49 +562,66 @@ ADD_CASES(TC_ConsoleOut, {"^BM_Repeat/repeats:4_median %console_time_only_report [ ]*4$"}, {"^BM_Repeat/repeats:4_stddev %console_time_only_report [ ]*4$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_mean\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_median\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_stddev\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:4\",%csv_report$"}, {"^\"BM_Repeat/repeats:4\",%csv_report$"}, @@ -457,6 +640,8 @@ void BM_RepeatOnce(benchmark::State& state) { BENCHMARK(BM_RepeatOnce)->Repetitions(1)->ReportAggregatesOnly(); ADD_CASES(TC_ConsoleOut, {{"^BM_RepeatOnce/repeats:1 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_RepeatOnce/repeats:1\",$"}, + {"\"family_index\": 18,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatOnce/repeats:1\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, @@ -479,25 +664,34 @@ ADD_CASES( ADD_CASES(TC_JSONOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, {"\"name\": \"BM_SummaryRepeat/repeats:3_mean\",$"}, + {"\"family_index\": 19,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_SummaryRepeat/repeats:3_median\",$"}, + {"\"family_index\": 19,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"}, + {"\"family_index\": 19,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, {"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"}, @@ -521,25 +715,34 @@ ADD_CASES( ADD_CASES(TC_JSONOut, {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, {"\"name\": \"BM_SummaryDisplay/repeats:2_mean\",$"}, + {"\"family_index\": 20,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_SummaryDisplay/repeats:2_median\",$"}, + {"\"family_index\": 20,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_SummaryDisplay/repeats:2_stddev\",$"}, + {"\"family_index\": 20,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, @@ -567,27 +770,36 @@ ADD_CASES( ADD_CASES(TC_JSONOut, {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_mean\",$"}, + {"\"family_index\": 21,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_median\",$"}, + {"\"family_index\": 21,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_stddev\",$"}, + {"\"family_index\": 21,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}}); ADD_CASES(TC_CSVOut, @@ -635,6 +847,8 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_UserStats/iterations:5/repeats:3/manual_time [ " ADD_CASES( TC_JSONOut, {{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, @@ -644,6 +858,8 @@ ADD_CASES( {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, @@ -653,6 +869,8 @@ ADD_CASES( {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, @@ -662,39 +880,51 @@ ADD_CASES( {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_mean\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_median\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_stddev\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}}); ADD_CASES( @@ -709,10 +939,158 @@ ADD_CASES( "manual_time_stddev\",%csv_report$"}, {"^\"BM_UserStats/iterations:5/repeats:3/manual_time_\",%csv_report$"}}); +// ========================================================================= // +// ------------- Testing relative standard deviation statistics ------------ // +// ========================================================================= // + +const auto UserPercentStatistics = [](const std::vector&) { + return 1. / 100.; +}; +void BM_UserPercentStats(benchmark::State& state) { + for (auto _ : state) { + state.SetIterationTime(150 / 10e8); + } +} +// clang-format off +BENCHMARK(BM_UserPercentStats) + ->Repetitions(3) + ->Iterations(5) + ->UseManualTime() + ->Unit(benchmark::TimeUnit::kNanosecond) + ->ComputeStatistics("", UserPercentStatistics, benchmark::StatisticUnit::kPercentage); +// clang-format on + +// check that UserPercent-provided stats is calculated, and is after the +// default-ones empty string as name is intentional, it would sort before +// anything else +ADD_CASES(TC_ConsoleOut, + {{"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_mean [ ]* 150 ns %time [ ]*3$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_median [ ]* 150 ns %time [ ]*3$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_stddev [ ]* 0.000 ns %time [ ]*3$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time_ " + "[ ]* 1.00 % [ ]* 1.00 %[ ]*3$"}}); +ADD_CASES( + TC_JSONOut, + {{"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_mean\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_median\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_stddev\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.(0)*e-(0)*2,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_mean\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_median\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_stddev\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_\",%csv_report$"}}); + // ========================================================================= // // ------------------------- Testing StrEscape JSON ------------------------ // // ========================================================================= // -#if 0 // enable when csv testing code correctly handles multi-line fields +#if 0 // enable when csv testing code correctly handles multi-line fields void BM_JSON_Format(benchmark::State& state) { state.SkipWithError("val\b\f\n\r\t\\\"with\"es,capes"); for (auto _ : state) { @@ -720,9 +1098,11 @@ void BM_JSON_Format(benchmark::State& state) { } BENCHMARK(BM_JSON_Format); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_JSON_Format\",$"}, + {"\"family_index\": 23,$", MR_Next}, +{"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_JSON_Format\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"error_occurred\": true,$", MR_Next}, diff --git a/test/skip_with_error_test.cc b/test/skip_with_error_test.cc index 97a2e3c..026d479 100644 --- a/test/skip_with_error_test.cc +++ b/test/skip_with_error_test.cc @@ -10,11 +10,11 @@ namespace { class TestReporter : public benchmark::ConsoleReporter { public: - virtual bool ReportContext(const Context& context) { + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { return ConsoleReporter::ReportContext(context); }; - virtual void ReportRuns(const std::vector& report) { + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { all_runs_.insert(all_runs_.end(), begin(report), end(report)); ConsoleReporter::ReportRuns(report); } @@ -33,14 +33,14 @@ struct TestCase { typedef benchmark::BenchmarkReporter::Run Run; void CheckRun(Run const& run) const { - CHECK(name == run.benchmark_name()) + BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got " << run.benchmark_name(); - CHECK(error_occurred == run.error_occurred); - CHECK(error_message == run.error_message); + BM_CHECK(error_occurred == run.error_occurred); + BM_CHECK(error_message == run.error_message); if (error_occurred) { - // CHECK(run.iterations == 0); + // BM_CHECK(run.iterations == 0); } else { - CHECK(run.iterations != 0); + BM_CHECK(run.iterations != 0); } } }; @@ -97,7 +97,7 @@ ADD_CASES("BM_error_before_running_range_for", {{"", true, "error message"}}); void BM_error_during_running(benchmark::State& state) { int first_iter = true; while (state.KeepRunning()) { - if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) { + if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) { assert(first_iter); first_iter = false; state.SkipWithError("error message"); @@ -119,12 +119,13 @@ ADD_CASES("BM_error_during_running", {{"/1/threads:1", true, "error message"}, void BM_error_during_running_ranged_for(benchmark::State& state) { assert(state.max_iterations > 3 && "test requires at least a few iterations"); - int first_iter = true; + bool first_iter = true; // NOTE: Users should not write the for loop explicitly. for (auto It = state.begin(), End = state.end(); It != End; ++It) { if (state.range(0) == 1) { assert(first_iter); first_iter = false; + (void)first_iter; state.SkipWithError("error message"); // Test the unfortunate but documented behavior that the ranged-for loop // doesn't automatically terminate when SkipWithError is set. @@ -142,7 +143,7 @@ void BM_error_after_running(benchmark::State& state) { for (auto _ : state) { benchmark::DoNotOptimize(state.iterations()); } - if (state.thread_index <= (state.threads / 2)) + if (state.thread_index() <= (state.threads() / 2)) state.SkipWithError("error message"); } BENCHMARK(BM_error_after_running)->ThreadRange(1, 8); @@ -154,7 +155,7 @@ ADD_CASES("BM_error_after_running", {{"/threads:1", true, "error message"}, void BM_error_while_paused(benchmark::State& state) { bool first_iter = true; while (state.KeepRunning()) { - if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) { + if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) { assert(first_iter); first_iter = false; state.PauseTiming(); diff --git a/test/spec_arg_test.cc b/test/spec_arg_test.cc new file mode 100644 index 0000000..043db1b --- /dev/null +++ b/test/spec_arg_test.cc @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Tests that we can override benchmark-spec value from FLAGS_benchmark_filter +// with argument to RunSpecifiedBenchmarks(...). + +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + assert(report.size() == 1); + matched_functions.push_back(report[0].run_name.function_name); + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() {} + + virtual ~TestReporter() {} + + const std::vector& GetMatchedFunctions() const { + return matched_functions; + } + + private: + std::vector matched_functions; +}; + +} // end namespace + +static void BM_NotChosen(benchmark::State& state) { + assert(false && "SHOULD NOT BE CALLED"); + for (auto _ : state) { + } +} +BENCHMARK(BM_NotChosen); + +static void BM_Chosen(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_Chosen); + +int main(int argc, char** argv) { + const std::string flag = "BM_NotChosen"; + + // Verify that argv specify --benchmark_filter=BM_NotChosen. + bool found = false; + for (int i = 0; i < argc; ++i) { + if (strcmp("--benchmark_filter=BM_NotChosen", argv[i]) == 0) { + found = true; + break; + } + } + assert(found); + + benchmark::Initialize(&argc, argv); + + // Check that the current flag value is reported accurately via the + // GetBenchmarkFilter() function. + if (flag != benchmark::GetBenchmarkFilter()) { + std::cerr + << "Seeing different value for flags. GetBenchmarkFilter() returns [" + << benchmark::GetBenchmarkFilter() << "] expected flag=[" << flag + << "]\n"; + return 1; + } + TestReporter test_reporter; + const char* const spec = "BM_Chosen"; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, spec); + assert(returned_count == 1); + const std::vector matched_functions = + test_reporter.GetMatchedFunctions(); + assert(matched_functions.size() == 1); + if (strcmp(spec, matched_functions.front().c_str()) != 0) { + std::cerr << "Expected benchmark [" << spec << "] to run, but got [" + << matched_functions.front() << "]\n"; + return 2; + } + return 0; +} diff --git a/test/statistics_gtest.cc b/test/statistics_gtest.cc index 3ddc72d..1de2d87 100644 --- a/test/statistics_gtest.cc +++ b/test/statistics_gtest.cc @@ -25,4 +25,11 @@ TEST(StatisticsTest, StdDev) { 1.151086443322134); } +TEST(StatisticsTest, CV) { + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({101, 101, 101, 101}), 0.0); + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({1, 2, 3}), 1. / 2.); + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({2.5, 2.4, 3.3, 4.2, 5.1}), + 0.32888184094918121); +} + } // end namespace diff --git a/test/string_util_gtest.cc b/test/string_util_gtest.cc index 01bf155..698f2d4 100644 --- a/test/string_util_gtest.cc +++ b/test/string_util_gtest.cc @@ -2,8 +2,8 @@ // statistics_test - Unit tests for src/statistics.cc //===---------------------------------------------------------------------===// -#include "../src/string_util.h" #include "../src/internal_macros.h" +#include "../src/string_util.h" #include "gtest/gtest.h" namespace { @@ -32,7 +32,8 @@ TEST(StringUtilTest, stoul) { #elif ULONG_MAX == 0xFFFFFFFFFFFFFFFFul { size_t pos = 0; - EXPECT_EQ(0xFFFFFFFFFFFFFFFFul, benchmark::stoul("18446744073709551615", &pos)); + EXPECT_EQ(0xFFFFFFFFFFFFFFFFul, + benchmark::stoul("18446744073709551615", &pos)); EXPECT_EQ(20ul, pos); } #endif @@ -62,92 +63,90 @@ TEST(StringUtilTest, stoul) { EXPECT_EQ(4ul, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); - } + { ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); } #endif } -TEST(StringUtilTest, stoi) { - { - size_t pos = 0; - EXPECT_EQ(0, benchmark::stoi("0", &pos)); - EXPECT_EQ(1ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(-17, benchmark::stoi("-17", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1357, benchmark::stoi("1357", &pos)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16)); - EXPECT_EQ(4ul, pos); - } +TEST(StringUtilTest, stoi){{size_t pos = 0; +EXPECT_EQ(0, benchmark::stoi("0", &pos)); +EXPECT_EQ(1ul, pos); +} // namespace +{ + size_t pos = 0; + EXPECT_EQ(-17, benchmark::stoi("-17", &pos)); + EXPECT_EQ(3ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1357, benchmark::stoi("1357", &pos)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16)); + EXPECT_EQ(4ul, pos); +} #ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); - } +{ ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); } #endif } -TEST(StringUtilTest, stod) { - { - size_t pos = 0; - EXPECT_EQ(0.0, benchmark::stod("0", &pos)); - EXPECT_EQ(1ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(-84.0, benchmark::stod("-84", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1234.0, benchmark::stod("1234", &pos)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1.5, benchmark::stod("1.5", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - /* Note: exactly representable as double */ - EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos)); - EXPECT_EQ(8ul, pos); - } +TEST(StringUtilTest, stod){{size_t pos = 0; +EXPECT_EQ(0.0, benchmark::stod("0", &pos)); +EXPECT_EQ(1ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(-84.0, benchmark::stod("-84", &pos)); + EXPECT_EQ(3ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1234.0, benchmark::stod("1234", &pos)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1.5, benchmark::stod("1.5", &pos)); + EXPECT_EQ(3ul, pos); +} +{ + size_t pos = 0; + /* Note: exactly representable as double */ + EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos)); + EXPECT_EQ(8ul, pos); +} #ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); - } +{ ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); } #endif } +TEST(StringUtilTest, StrSplit) { + EXPECT_EQ(benchmark::StrSplit("", ','), std::vector{}); + EXPECT_EQ(benchmark::StrSplit("hello", ','), + std::vector({"hello"})); + EXPECT_EQ(benchmark::StrSplit("hello,there,is,more", ','), + std::vector({"hello", "there", "is", "more"})); +} + } // end namespace diff --git a/test/templated_fixture_test.cc b/test/templated_fixture_test.cc index fe9865c..af239c3 100644 --- a/test/templated_fixture_test.cc +++ b/test/templated_fixture_test.cc @@ -1,9 +1,9 @@ -#include "benchmark/benchmark.h" - #include #include +#include "benchmark/benchmark.h" + template class MyFixture : public ::benchmark::Fixture { public: diff --git a/test/user_counters_tabular_test.cc b/test/user_counters_tabular_test.cc index 18373c0..45ac043 100644 --- a/test/user_counters_tabular_test.cc +++ b/test/user_counters_tabular_test.cc @@ -7,19 +7,25 @@ // @todo: this checks the full output at once; the rule for // CounterSet1 was failing because it was not matching "^[-]+$". // @todo: check that the counters are vertically aligned. -ADD_CASES( - TC_ConsoleOut, - { - // keeping these lines long improves readability, so: - // clang-format off +ADD_CASES(TC_ConsoleOut, + { + // keeping these lines long improves readability, so: + // clang-format off {"^[-]+$", MR_Next}, {"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Bat %s Baz %s Foo %s Frob %s Lob$", MR_Next}, {"^[-]+$", MR_Next}, - {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_cv %console_percentage_report [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*%$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_cv %console_percentage_report [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*%$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, @@ -46,8 +52,8 @@ ADD_CASES( {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$"}, - // clang-format on - }); + // clang-format on + }); ADD_CASES(TC_CSVOut, {{"%csv_header," "\"Bar\",\"Bat\",\"Baz\",\"Foo\",\"Frob\",\"Lob\""}}); @@ -68,12 +74,15 @@ void BM_Counters_Tabular(benchmark::State& state) { {"Lob", {32, bm::Counter::kAvgThreads}}, }); } -BENCHMARK(BM_Counters_Tabular)->ThreadRange(1, 16); +BENCHMARK(BM_Counters_Tabular)->ThreadRange(1, 2)->Repetitions(2); ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/threads:%int\",$"}, - {"\"run_name\": \"BM_Counters_Tabular/threads:%int\",$", MR_Next}, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -87,8 +96,260 @@ ADD_CASES(TC_JSONOut, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/threads:%int\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_mean\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_cv\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"cv\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); + +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_cv\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"aggregate_name\": \"cv\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_mean\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_median\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_stddev\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_cv\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_mean\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_median\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_stddev\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_cv\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckTabular(Results const& e) { @@ -99,7 +360,10 @@ void CheckTabular(Results const& e) { CHECK_COUNTER_VALUE(e, int, "Frob", EQ, 16); CHECK_COUNTER_VALUE(e, int, "Lob", EQ, 32); } -CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/threads:%int", &CheckTabular); +CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:1$", + &CheckTabular); +CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:2$", + &CheckTabular); // ========================================================================= // // -------------------- Tabular+Rate Counters Output ----------------------- // @@ -123,10 +387,12 @@ void BM_CounterRates_Tabular(benchmark::State& state) { BENCHMARK(BM_CounterRates_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterRates_Tabular/threads:%int\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterRates_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -174,9 +440,11 @@ void BM_CounterSet0_Tabular(benchmark::State& state) { BENCHMARK(BM_CounterSet0_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet0_Tabular/threads:%int\",$"}, + {"\"family_index\": 2,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterSet0_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -212,9 +480,11 @@ void BM_CounterSet1_Tabular(benchmark::State& state) { BENCHMARK(BM_CounterSet1_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet1_Tabular/threads:%int\",$"}, + {"\"family_index\": 3,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterSet1_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -254,9 +524,11 @@ void BM_CounterSet2_Tabular(benchmark::State& state) { BENCHMARK(BM_CounterSet2_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet2_Tabular/threads:%int\",$"}, + {"\"family_index\": 4,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterSet2_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, diff --git a/test/user_counters_test.cc b/test/user_counters_test.cc index 5699f4f..1cc7455 100644 --- a/test/user_counters_test.cc +++ b/test/user_counters_test.cc @@ -26,15 +26,17 @@ void BM_Counters_Simple(benchmark::State& state) { for (auto _ : state) { } state.counters["foo"] = 1; - state.counters["bar"] = 2 * (double)state.iterations(); + state.counters["bar"] = 2 * static_cast(state.iterations()); } BENCHMARK(BM_Counters_Simple); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Simple %console_report bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Simple\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Simple\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -78,9 +80,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_WithBytesAndItemsPSec %console_report " "foo=%hrfloat items_per_second=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_WithBytesAndItemsPSec\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_WithBytesAndItemsPSec\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -125,9 +129,11 @@ ADD_CASES( TC_ConsoleOut, {{"^BM_Counters_Rate %console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Rate\",$"}, + {"\"family_index\": 2,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Rate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -165,9 +171,11 @@ BENCHMARK(BM_Invert); ADD_CASES(TC_ConsoleOut, {{"^BM_Invert %console_report bar=%hrfloatu foo=%hrfloatk$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Invert\",$"}, + {"\"family_index\": 3,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Invert\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -207,9 +215,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_InvertedRate %console_report " "bar=%hrfloats foo=%hrfloats$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_InvertedRate\",$"}, + {"\"family_index\": 4,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_InvertedRate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -246,9 +256,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Threads/threads:%int %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Threads/threads:%int\",$"}, + {"\"family_index\": 5,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Threads/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -285,9 +297,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreads/threads:%int " "%console_report bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgThreads/threads:%int\",$"}, + {"\"family_index\": 6,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgThreads/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -327,10 +341,12 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreadsRate/threads:%int " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$"}, + {"\"family_index\": 7,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -367,9 +383,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_IterationInvariant %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_IterationInvariant\",$"}, + {"\"family_index\": 8,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_IterationInvariant\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -412,10 +430,12 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kIsIterationInvariantRate " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_kIsIterationInvariantRate\",$"}, + {"\"family_index\": 9,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_kIsIterationInvariantRate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -455,9 +475,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgIterations %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgIterations\",$"}, + {"\"family_index\": 10,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgIterations\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -498,9 +520,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kAvgIterationsRate " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_kAvgIterationsRate\",$"}, + {"\"family_index\": 11,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_kAvgIterationsRate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, diff --git a/test/user_counters_thousands_test.cc b/test/user_counters_thousands_test.cc index 21d8285..a42683b 100644 --- a/test/user_counters_thousands_test.cc +++ b/test/user_counters_thousands_test.cc @@ -51,6 +51,8 @@ ADD_CASES( }); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, @@ -68,6 +70,8 @@ ADD_CASES(TC_JSONOut, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, @@ -85,11 +89,14 @@ ADD_CASES(TC_JSONOut, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2_mean\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -102,11 +109,14 @@ ADD_CASES(TC_JSONOut, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -119,11 +129,14 @@ ADD_CASES(TC_JSONOut, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel new file mode 100644 index 0000000..5895883 --- /dev/null +++ b/tools/BUILD.bazel @@ -0,0 +1,19 @@ +load("@py_deps//:requirements.bzl", "requirement") + +py_library( + name = "gbench", + srcs = glob(["gbench/*.py"]), + deps = [ + requirement("numpy"), + requirement("scipy"), + ], +) + +py_binary( + name = "compare", + srcs = ["compare.py"], + python_version = "PY2", + deps = [ + ":gbench", + ], +) diff --git a/tools/compare.py b/tools/compare.py index bd01be5..01d2c89 100755 --- a/tools/compare.py +++ b/tools/compare.py @@ -7,6 +7,7 @@ compare.py - versatile benchmark output compare tool import argparse from argparse import ArgumentParser +import json import sys import gbench from gbench import util, report @@ -56,6 +57,12 @@ def create_parser(): help="Do not use colors in the terminal output" ) + parser.add_argument( + '-d', + '--dump_to_json', + dest='dump_to_json', + help="Additionally, dump benchmark comparison output to this file in JSON format.") + utest = parser.add_argument_group() utest.add_argument( '--no-utest', @@ -231,10 +238,10 @@ def main(): options_contender = ['--benchmark_filter=%s' % filter_contender] # Run the benchmarks and report the results - json1 = json1_orig = gbench.util.run_or_load_benchmark( - test_baseline, benchmark_options + options_baseline) - json2 = json2_orig = gbench.util.run_or_load_benchmark( - test_contender, benchmark_options + options_contender) + json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( + test_baseline, benchmark_options + options_baseline)) + json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( + test_contender, benchmark_options + options_contender)) # Now, filter the benchmarks so that the difference report can work if filter_baseline and filter_contender: @@ -244,14 +251,20 @@ def main(): json2 = gbench.report.filter_benchmark( json2_orig, filter_contender, replacement) - # Diff and output - output_lines = gbench.report.generate_difference_report( - json1, json2, args.display_aggregates_only, + diff_report = gbench.report.get_difference_report( + json1, json2, args.utest) + output_lines = gbench.report.print_difference_report( + diff_report, + args.display_aggregates_only, args.utest, args.utest_alpha, args.color) print(description) for ln in output_lines: print(ln) + # Optionally, diff and output to JSON + if args.dump_to_json is not None: + with open(args.dump_to_json, 'w') as f_json: + json.dump(diff_report, f_json) class TestParser(unittest.TestCase): def setUp(self): diff --git a/tools/gbench/Inputs/test4_run.json b/tools/gbench/Inputs/test4_run.json new file mode 100644 index 0000000..eaa005f --- /dev/null +++ b/tools/gbench/Inputs/test4_run.json @@ -0,0 +1,96 @@ +{ + "benchmarks": [ + { + "name": "99 family 0 instance 0 repetition 0", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 0, + "repetition_index": 0 + }, + { + "name": "98 family 0 instance 0 repetition 1", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 0, + "repetition_index": 1 + }, + { + "name": "97 family 0 instance 0 aggregate", + "run_type": "aggregate", + "family_index": 0, + "per_family_instance_index": 0, + "aggregate_name": "9 aggregate" + }, + + + { + "name": "96 family 0 instance 1 repetition 0", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 1, + "repetition_index": 0 + }, + { + "name": "95 family 0 instance 1 repetition 1", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 1, + "repetition_index": 1 + }, + { + "name": "94 family 0 instance 1 aggregate", + "run_type": "aggregate", + "family_index": 0, + "per_family_instance_index": 1, + "aggregate_name": "9 aggregate" + }, + + + + + { + "name": "93 family 1 instance 0 repetition 0", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 0, + "repetition_index": 0 + }, + { + "name": "92 family 1 instance 0 repetition 1", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 0, + "repetition_index": 1 + }, + { + "name": "91 family 1 instance 0 aggregate", + "run_type": "aggregate", + "family_index": 1, + "per_family_instance_index": 0, + "aggregate_name": "9 aggregate" + }, + + + { + "name": "90 family 1 instance 1 repetition 0", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 1, + "repetition_index": 0 + }, + { + "name": "89 family 1 instance 1 repetition 1", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 1, + "repetition_index": 1 + }, + { + "name": "88 family 1 instance 1 aggregate", + "run_type": "aggregate", + "family_index": 1, + "per_family_instance_index": 1, + "aggregate_name": "9 aggregate" + } + ] +} diff --git a/tools/gbench/Inputs/test4_run0.json b/tools/gbench/Inputs/test4_run0.json new file mode 100644 index 0000000..54cf127 --- /dev/null +++ b/tools/gbench/Inputs/test4_run0.json @@ -0,0 +1,21 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "whocares", + "run_type": "aggregate", + "aggregate_name": "zz", + "aggregate_unit": "percentage", + "iterations": 1000, + "real_time": 0.01, + "cpu_time": 0.10, + "time_unit": "ns" + } + ] +} diff --git a/tools/gbench/Inputs/test4_run1.json b/tools/gbench/Inputs/test4_run1.json new file mode 100644 index 0000000..25d5605 --- /dev/null +++ b/tools/gbench/Inputs/test4_run1.json @@ -0,0 +1,21 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "whocares", + "run_type": "aggregate", + "aggregate_name": "zz", + "aggregate_unit": "percentage", + "iterations": 1000, + "real_time": 0.005, + "cpu_time": 0.15, + "time_unit": "ns" + } + ] +} diff --git a/tools/gbench/report.py b/tools/gbench/report.py index 5bd3a8d..4c798ba 100644 --- a/tools/gbench/report.py +++ b/tools/gbench/report.py @@ -1,11 +1,15 @@ -import unittest """report.py - Utilities for reporting statistics about benchmark results """ + +import unittest import os import re import copy +import random -from scipy.stats import mannwhitneyu +from scipy.stats import mannwhitneyu, gmean +from numpy import array +from pandas import Timedelta class BenchmarkColor(object): @@ -148,12 +152,37 @@ def partition_benchmarks(json1, json2): return partitions +def get_timedelta_field_as_seconds(benchmark, field_name): + """ + Get value of field_name field of benchmark, which is time with time unit + time_unit, as time in seconds. + """ + time_unit = benchmark['time_unit'] if 'time_unit' in benchmark else 's' + dt = Timedelta(benchmark[field_name], time_unit) + return dt / Timedelta(1, 's') + + +def calculate_geomean(json): + """ + Extract all real/cpu times from all the benchmarks as seconds, + and calculate their geomean. + """ + times = [] + for benchmark in json['benchmarks']: + if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate': + continue + times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'), + get_timedelta_field_as_seconds(benchmark, 'cpu_time')]) + return gmean(times) if times else array([]) + + def extract_field(partition, field_name): # The count of elements may be different. We want *all* of them. lhs = [x[field_name] for x in partition[0]] rhs = [x[field_name] for x in partition[1]] return [lhs, rhs] + def calc_utest(timings_cpu, timings_time): min_rep_cnt = min(len(timings_time[0]), len(timings_time[1]), @@ -171,46 +200,127 @@ def calc_utest(timings_cpu, timings_time): return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue -def print_utest(partition, utest_alpha, first_col_width, use_color=True): + +def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): def get_utest_color(pval): return BC_FAIL if pval >= utest_alpha else BC_OKGREEN - timings_time = extract_field(partition, 'real_time') - timings_cpu = extract_field(partition, 'cpu_time') - have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time) - # Check if we failed miserably with minimum required repetitions for utest - if not have_optimal_repetitions and cpu_pvalue is None and time_pvalue is None: + if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None: return [] dsc = "U Test, Repetitions: {} vs {}".format( - len(timings_cpu[0]), len(timings_cpu[1])) + utest['nr_of_repetitions'], utest['nr_of_repetitions_other']) dsc_color = BC_OKGREEN # We still got some results to show but issue a warning about it. - if not have_optimal_repetitions: + if not utest['have_optimal_repetitions']: dsc_color = BC_WARNING dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( UTEST_OPTIMAL_REPETITIONS) special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" - last_name = partition[0][0]['name'] return [color_format(use_color, special_str, BC_HEADER, - "{}{}".format(last_name, UTEST_COL_NAME), + "{}{}".format(bc_name, UTEST_COL_NAME), first_col_width, - get_utest_color(time_pvalue), time_pvalue, - get_utest_color(cpu_pvalue), cpu_pvalue, + get_utest_color( + utest['time_pvalue']), utest['time_pvalue'], + get_utest_color( + utest['cpu_pvalue']), utest['cpu_pvalue'], dsc_color, dsc, endc=BC_ENDC)] -def generate_difference_report( +def get_difference_report( json1, json2, - display_aggregates_only=False, + utest=False): + """ + Calculate and report the difference between each test of two benchmarks + runs specified as 'json1' and 'json2'. Output is another json containing + relevant details for each test run. + """ + assert utest is True or utest is False + + diff_report = [] + partitions = partition_benchmarks(json1, json2) + for partition in partitions: + benchmark_name = partition[0][0]['name'] + time_unit = partition[0][0]['time_unit'] + measurements = [] + utest_results = {} + # Careful, we may have different repetition count. + for i in range(min(len(partition[0]), len(partition[1]))): + bn = partition[0][i] + other_bench = partition[1][i] + measurements.append({ + 'real_time': bn['real_time'], + 'cpu_time': bn['cpu_time'], + 'real_time_other': other_bench['real_time'], + 'cpu_time_other': other_bench['cpu_time'], + 'time': calculate_change(bn['real_time'], other_bench['real_time']), + 'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time']) + }) + + # After processing the whole partition, if requested, do the U test. + if utest: + timings_cpu = extract_field(partition, 'cpu_time') + timings_time = extract_field(partition, 'real_time') + have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest( + timings_cpu, timings_time) + if cpu_pvalue and time_pvalue: + utest_results = { + 'have_optimal_repetitions': have_optimal_repetitions, + 'cpu_pvalue': cpu_pvalue, + 'time_pvalue': time_pvalue, + 'nr_of_repetitions': len(timings_cpu[0]), + 'nr_of_repetitions_other': len(timings_cpu[1]) + } + + # Store only if we had any measurements for given benchmark. + # E.g. partition_benchmarks will filter out the benchmarks having + # time units which are not compatible with other time units in the + # benchmark suite. + if measurements: + run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else '' + aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' + diff_report.append({ + 'name': benchmark_name, + 'measurements': measurements, + 'time_unit': time_unit, + 'run_type': run_type, + 'aggregate_name': aggregate_name, + 'utest': utest_results + }) + + lhs_gmean = calculate_geomean(json1) + rhs_gmean = calculate_geomean(json2) + if lhs_gmean.any() and rhs_gmean.any(): + diff_report.append({ + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{ + 'real_time': lhs_gmean[0], + 'cpu_time': lhs_gmean[1], + 'real_time_other': rhs_gmean[0], + 'cpu_time_other': rhs_gmean[1], + 'time': calculate_change(lhs_gmean[0], rhs_gmean[0]), + 'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1]) + }], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + }) + + return diff_report + + +def print_difference_report( + json_diff_report, + include_aggregates_only=False, utest=False, utest_alpha=0.05, use_color=True): @@ -219,14 +329,16 @@ def generate_difference_report( runs specified as 'json1' and 'json2'. """ assert utest is True or utest is False - first_col_width = find_longest_name(json1['benchmarks']) - def find_test(name): - for b in json2['benchmarks']: - if b['name'] == name: - return b - return None + def get_color(res): + if res > 0.05: + return BC_FAIL + elif res > -0.07: + return BC_WHITE + else: + return BC_CYAN + first_col_width = find_longest_name(json_diff_report) first_col_width = max( first_col_width, len('Benchmark')) @@ -235,50 +347,33 @@ def generate_difference_report( 'Benchmark', 12 + first_col_width) output_strs = [first_line, '-' * len(first_line)] - partitions = partition_benchmarks(json1, json2) - for partition in partitions: - # Careful, we may have different repetition count. - for i in range(min(len(partition[0]), len(partition[1]))): - bn = partition[0][i] - other_bench = partition[1][i] + fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" + for benchmark in json_diff_report: + # *If* we were asked to only include aggregates, + # and if it is non-aggregate, then don't print it. + if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate': + for measurement in benchmark['measurements']: + output_strs += [color_format(use_color, + fmt_str, + BC_HEADER, + benchmark['name'], + first_col_width, + get_color(measurement['time']), + measurement['time'], + get_color(measurement['cpu']), + measurement['cpu'], + measurement['real_time'], + measurement['real_time_other'], + measurement['cpu_time'], + measurement['cpu_time_other'], + endc=BC_ENDC)] - # *If* we were asked to only display aggregates, - # and if it is non-aggregate, then skip it. - if display_aggregates_only and 'run_type' in bn and 'run_type' in other_bench: - assert bn['run_type'] == other_bench['run_type'] - if bn['run_type'] != 'aggregate': - continue - - fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" - - def get_color(res): - if res > 0.05: - return BC_FAIL - elif res > -0.07: - return BC_WHITE - else: - return BC_CYAN - - tres = calculate_change(bn['real_time'], other_bench['real_time']) - cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time']) - output_strs += [color_format(use_color, - fmt_str, - BC_HEADER, - bn['name'], - first_col_width, - get_color(tres), - tres, - get_color(cpures), - cpures, - bn['real_time'], - other_bench['real_time'], - bn['cpu_time'], - other_bench['cpu_time'], - endc=BC_ENDC)] - - # After processing the whole partition, if requested, do the U test. - if utest: - output_strs += print_utest(partition, + # After processing the measurements, if requested and + # if applicable (e.g. u-test exists for given benchmark), + # print the U test. + if utest and benchmark['utest']: + output_strs += print_utest(benchmark['name'], + benchmark['utest'], utest_alpha=utest_alpha, first_col_width=first_col_width, use_color=use_color) @@ -319,21 +414,26 @@ class TestGetUniqueBenchmarkNames(unittest.TestCase): class TestReportDifference(unittest.TestCase): - def load_results(self): - import json - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'Inputs') - testOutput1 = os.path.join(testInputs, 'test1_run1.json') - testOutput2 = os.path.join(testInputs, 'test1_run2.json') - with open(testOutput1, 'r') as f: - json1 = json.load(f) - with open(testOutput2, 'r') as f: - json2 = json.load(f) - return json1, json2 + @classmethod + def setUpClass(cls): + def load_results(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test1_run1.json') + testOutput2 = os.path.join(testInputs, 'test1_run2.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 - def test_basic(self): + json1, json2 = load_results() + cls.json_diff_report = get_difference_report(json1, json2) + + def test_json_diff_report_pretty_printing(self): expect_lines = [ ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], @@ -350,10 +450,10 @@ class TestReportDifference(unittest.TestCase): '-0.1000', '100', '110', '100', '90'], ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], + ['OVERALL_GEOMEAN', '-0.8344', '-0.8026', '0', '0', '0', '0'] ] - json1, json2 = self.load_results() - output_lines_with_header = generate_difference_report( - json1, json2, use_color=False) + output_lines_with_header = print_difference_report( + self.json_diff_report, use_color=False) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) @@ -363,31 +463,128 @@ class TestReportDifference(unittest.TestCase): self.assertEqual(len(parts), 7) self.assertEqual(expect_lines[i], parts) + def test_json_diff_report_output(self): + expected_output = [ + { + 'name': 'BM_SameTimes', + 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_2xFaster', + 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_2xSlower', + 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_1PercentFaster', + 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_1PercentSlower', + 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_10PercentFaster', + 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_10PercentSlower', + 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_100xSlower', + 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_100xFaster', + 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_10PercentCPUToTime', + 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_ThirdFaster', + 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_NotBadTimeUnit', + 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}], + 'time_unit': 's', + 'utest': {} + }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 1.193776641714438e-06, 'cpu_time': 1.2144445585302297e-06, + 'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07, + 'time': -0.834399601997324, 'cpu': -0.8025889499549471}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', 'utest': {} + }, + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + class TestReportDifferenceBetweenFamilies(unittest.TestCase): - def load_result(self): - import json - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'Inputs') - testOutput = os.path.join(testInputs, 'test2_run.json') - with open(testOutput, 'r') as f: - json = json.load(f) - return json + @classmethod + def setUpClass(cls): + def load_result(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test2_run.json') + with open(testOutput, 'r') as f: + json = json.load(f) + return json - def test_basic(self): + json = load_result() + json1 = filter_benchmark(json, "BM_Z.ro", ".") + json2 = filter_benchmark(json, "BM_O.e", ".") + cls.json_diff_report = get_difference_report(json1, json2) + + def test_json_diff_report_pretty_printing(self): expect_lines = [ ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], + ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0'] ] - json = self.load_result() - json1 = filter_benchmark(json, "BM_Z.ro", ".") - json2 = filter_benchmark(json, "BM_O.e", ".") - output_lines_with_header = generate_difference_report( - json1, json2, use_color=False) + output_lines_with_header = print_difference_report( + self.json_diff_report, use_color=False) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) @@ -397,31 +594,81 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase): self.assertEqual(len(parts), 7) self.assertEqual(expect_lines[i], parts) + def test_json_diff_report(self): + expected_output = [ + { + 'name': u'.', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': u'./4', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}], + 'time_unit': 'ns', + 'utest': {}, + }, + { + 'name': u'Prefix/.', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': u'Prefix/./3', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08, + 'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08, + 'time': -0.5000000000000009, 'cpu': -0.5000000000000009}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + } + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + class TestReportDifferenceWithUTest(unittest.TestCase): - def load_results(self): - import json - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'Inputs') - testOutput1 = os.path.join(testInputs, 'test3_run0.json') - testOutput2 = os.path.join(testInputs, 'test3_run1.json') - with open(testOutput1, 'r') as f: - json1 = json.load(f) - with open(testOutput2, 'r') as f: - json2 = json.load(f) - return json1, json2 + @classmethod + def setUpClass(cls): + def load_results(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test3_run0.json') + testOutput2 = os.path.join(testInputs, 'test3_run1.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 - def test_utest(self): - expect_lines = [] + json1, json2 = load_results() + cls.json_diff_report = get_difference_report( + json1, json2, utest=True) + + def test_json_diff_report_pretty_printing(self): expect_lines = [ ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], ['BM_Two_pvalue', - '0.6985', - '0.6985', + '1.0000', + '0.6667', 'U', 'Test,', 'Repetitions:', @@ -438,7 +685,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], ['short_pvalue', '0.7671', - '0.1489', + '0.2000', 'U', 'Test,', 'Repetitions:', @@ -452,10 +699,10 @@ class TestReportDifferenceWithUTest(unittest.TestCase): 'repetitions', 'recommended.'], ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] ] - json1, json2 = self.load_results() - output_lines_with_header = generate_difference_report( - json1, json2, utest=True, utest_alpha=0.05, use_color=False) + output_lines_with_header = print_difference_report( + self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) output_lines = output_lines_with_header[2:] print("\n") print("\n".join(output_lines_with_header)) @@ -464,32 +711,12 @@ class TestReportDifferenceWithUTest(unittest.TestCase): parts = [x for x in output_lines[i].split(' ') if x] self.assertEqual(expect_lines[i], parts) - -class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( - unittest.TestCase): - def load_results(self): - import json - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'Inputs') - testOutput1 = os.path.join(testInputs, 'test3_run0.json') - testOutput2 = os.path.join(testInputs, 'test3_run1.json') - with open(testOutput1, 'r') as f: - json1 = json.load(f) - with open(testOutput2, 'r') as f: - json2 = json.load(f) - return json1, json2 - - def test_utest(self): - expect_lines = [] + def test_json_diff_report_pretty_printing_aggregates_only(self): expect_lines = [ ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], - ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], - ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], ['BM_Two_pvalue', - '0.6985', - '0.6985', + '1.0000', + '0.6667', 'U', 'Test,', 'Repetitions:', @@ -506,7 +733,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], ['short_pvalue', '0.7671', - '0.1489', + '0.2000', 'U', 'Test,', 'Repetitions:', @@ -519,10 +746,168 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( '9+', 'repetitions', 'recommended.'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] ] - json1, json2 = self.load_results() - output_lines_with_header = generate_difference_report( - json1, json2, display_aggregates_only=True, + output_lines_with_header = print_difference_report( + self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(expect_lines[i], parts) + + def test_json_diff_report(self): + expected_output = [ + { + 'name': u'BM_One', + 'measurements': [ + {'time': -0.1, + 'cpu': 0.1, + 'real_time': 10, + 'real_time_other': 9, + 'cpu_time': 100, + 'cpu_time_other': 110} + ], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': u'BM_Two', + 'measurements': [ + {'time': 0.1111111111111111, + 'cpu': -0.011111111111111112, + 'real_time': 9, + 'real_time_other': 10, + 'cpu_time': 90, + 'cpu_time_other': 89}, + {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, + 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} + ], + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 + } + }, + { + 'name': u'short', + 'measurements': [ + {'time': -0.125, + 'cpu': -0.0625, + 'real_time': 8, + 'real_time_other': 7, + 'cpu_time': 80, + 'cpu_time_other': 75}, + {'time': -0.4325, + 'cpu': -0.13506493506493514, + 'real_time': 8, + 'real_time_other': 4.54, + 'cpu_time': 77, + 'cpu_time_other': 66.6} + ], + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 + } + }, + { + 'name': u'medium', + 'measurements': [ + {'time': -0.375, + 'cpu': -0.3375, + 'real_time': 8, + 'real_time_other': 5, + 'cpu_time': 80, + 'cpu_time_other': 53} + ], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, + 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, + 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + } + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + + +class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( + unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_results(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test3_run0.json') + testOutput2 = os.path.join(testInputs, 'test3_run1.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + json1, json2 = load_results() + cls.json_diff_report = get_difference_report( + json1, json2, utest=True) + + def test_json_diff_report_pretty_printing(self): + expect_lines = [ + ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], + ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], + ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], + ['BM_Two_pvalue', + '1.0000', + '0.6667', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '2.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], + ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], + ['short_pvalue', + '0.7671', + '0.2000', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '3.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) output_lines = output_lines_with_header[2:] print("\n") @@ -532,6 +917,224 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( parts = [x for x in output_lines[i].split(' ') if x] self.assertEqual(expect_lines[i], parts) + def test_json_diff_report(self): + expected_output = [ + { + 'name': u'BM_One', + 'measurements': [ + {'time': -0.1, + 'cpu': 0.1, + 'real_time': 10, + 'real_time_other': 9, + 'cpu_time': 100, + 'cpu_time_other': 110} + ], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': u'BM_Two', + 'measurements': [ + {'time': 0.1111111111111111, + 'cpu': -0.011111111111111112, + 'real_time': 9, + 'real_time_other': 10, + 'cpu_time': 90, + 'cpu_time_other': 89}, + {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, + 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} + ], + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 + } + }, + { + 'name': u'short', + 'measurements': [ + {'time': -0.125, + 'cpu': -0.0625, + 'real_time': 8, + 'real_time_other': 7, + 'cpu_time': 80, + 'cpu_time_other': 75}, + {'time': -0.4325, + 'cpu': -0.13506493506493514, + 'real_time': 8, + 'real_time_other': 4.54, + 'cpu_time': 77, + 'cpu_time_other': 66.6} + ], + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 + } + }, + { + 'name': u'medium', + 'measurements': [ + {'real_time_other': 5, + 'cpu_time': 80, + 'time': -0.375, + 'real_time': 8, + 'cpu_time_other': 53, + 'cpu': -0.3375 + } + ], + 'utest': {}, + 'time_unit': u'ns', + 'aggregate_name': '' + }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, + 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, + 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + } + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + + +class TestReportDifferenceForPercentageAggregates( + unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_results(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test4_run0.json') + testOutput2 = os.path.join(testInputs, 'test4_run1.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + json1, json2 = load_results() + cls.json_diff_report = get_difference_report( + json1, json2, utest=True) + + def test_json_diff_report_pretty_printing(self): + expect_lines = [ + ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0'] + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, + utest=True, utest_alpha=0.05, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(expect_lines[i], parts) + + def test_json_diff_report(self): + expected_output = [ + { + 'name': u'whocares', + 'measurements': [ + {'time': -0.5, + 'cpu': 0.5, + 'real_time': 0.01, + 'real_time_other': 0.005, + 'cpu_time': 0.10, + 'cpu_time_other': 0.15} + ], + 'time_unit': 'ns', + 'utest': {} + } + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + + +class TestReportSorting(unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_result(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test4_run.json') + with open(testOutput, 'r') as f: + json = json.load(f) + return json + + cls.json = load_result() + + def test_json_diff_report_pretty_printing(self): + import util + + expected_names = [ + "99 family 0 instance 0 repetition 0", + "98 family 0 instance 0 repetition 1", + "97 family 0 instance 0 aggregate", + "96 family 0 instance 1 repetition 0", + "95 family 0 instance 1 repetition 1", + "94 family 0 instance 1 aggregate", + "93 family 1 instance 0 repetition 0", + "92 family 1 instance 0 repetition 1", + "91 family 1 instance 0 aggregate", + "90 family 1 instance 1 repetition 0", + "89 family 1 instance 1 repetition 1", + "88 family 1 instance 1 aggregate" + ] + + for n in range(len(self.json['benchmarks']) ** 2): + random.shuffle(self.json['benchmarks']) + sorted_benchmarks = util.sort_benchmark_results(self.json)[ + 'benchmarks'] + self.assertEqual(len(expected_names), len(sorted_benchmarks)) + for out, expected in zip(sorted_benchmarks, expected_names): + self.assertEqual(out['name'], expected) + + +def assert_utest(unittest_instance, lhs, rhs): + if lhs['utest']: + unittest_instance.assertAlmostEqual( + lhs['utest']['cpu_pvalue'], + rhs['utest']['cpu_pvalue']) + unittest_instance.assertAlmostEqual( + lhs['utest']['time_pvalue'], + rhs['utest']['time_pvalue']) + unittest_instance.assertEqual( + lhs['utest']['have_optimal_repetitions'], + rhs['utest']['have_optimal_repetitions']) + else: + # lhs is empty. assert if rhs is not. + unittest_instance.assertEqual(lhs['utest'], rhs['utest']) + + +def assert_measurements(unittest_instance, lhs, rhs): + for m1, m2 in zip(lhs['measurements'], rhs['measurements']): + unittest_instance.assertEqual(m1['real_time'], m2['real_time']) + unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time']) + # m1['time'] and m1['cpu'] hold values which are being calculated, + # and therefore we must use almost-equal pattern. + unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4) + unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4) + if __name__ == '__main__': unittest.main() diff --git a/tools/gbench/util.py b/tools/gbench/util.py index 661c4ba..5d0012c 100644 --- a/tools/gbench/util.py +++ b/tools/gbench/util.py @@ -5,6 +5,7 @@ import os import tempfile import subprocess import sys +import functools # Input file type enumeration IT_Invalid = 0 @@ -119,6 +120,23 @@ def load_benchmark_results(fname): return json.load(f) +def sort_benchmark_results(result): + benchmarks = result['benchmarks'] + + # From inner key to the outer key! + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1) + benchmarks = sorted( + benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0) + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1) + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1) + + result['benchmarks'] = benchmarks + return result + + def run_benchmark(exe_name, benchmark_flags): """ Run a benchmark specified by 'exe_name' with the specified