Add number of threads to State.

Having access to the thread count from within a benchmark is useful, for when one wants to distribute a workload dynamically among the benchmarks running in parallel e.g when using ThreadRange() or ThreadPerCpu().
2024-11-23 07:20:12 +00:00 · 2016-02-15 14:19:43 +01:00 · 2016-02-15 14:19:43 +01:00 · 007efee751
commit 007efee751
parent bdb9f697d7
3 changed files with 32 additions and 3 deletions
--- a/include/benchmark/benchmark_api.h
+++ b/include/benchmark/benchmark_api.h
@ -221,7 +221,7 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
 // benchmark to use.
 class State {
 public:
-  State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i);
+  State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i, int n_threads);

  // Returns true iff the benchmark should continue through another iteration.
  // NOTE: A benchmark may not return from the test until KeepRunning() has
@ -358,7 +358,10 @@ private:
  size_t items_processed_;

 public:
+  // Index of the executing thread. Values from [0, threads).
  const int thread_index;
+  // Number of threads concurrently executing the benchmark.
+  const int threads;
  const size_t max_iterations;

 private:
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@ -599,7 +599,7 @@ namespace {
 void RunInThread(const benchmark::internal::Benchmark::Instance* b,
                 size_t iters, int thread_id,
                 ThreadStats* total) EXCLUDES(GetBenchmarkLock()) {
-  State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id);
+  State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id, b->threads);
  b->benchmark->Run(st);
  CHECK(st.iterations() == st.max_iterations) <<
    "Benchmark returned before State::KeepRunning() returned false!";
@ -736,15 +736,17 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
 }  // namespace

 State::State(size_t max_iters, bool has_x, int x, bool has_y, int y,
-             int thread_i)
+             int thread_i, int n_threads)
    : started_(false), total_iterations_(0),
      has_range_x_(has_x), range_x_(x),
      has_range_y_(has_y), range_y_(y),
      bytes_processed_(0), items_processed_(0),
      thread_index(thread_i),
+      threads(n_threads),
      max_iterations(max_iters)
 {
    CHECK(max_iterations != 0) << "At least one iteration must be run";
+    CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
 }

 void State::PauseTiming() {
--- a/test/benchmark_test.cc
+++ b/test/benchmark_test.cc
@ -150,5 +150,29 @@ static void BM_LongTest(benchmark::State& state) {
 }
 BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);

+static void BM_ParallelMemset(benchmark::State& state) {
+  int size = state.range_x() / sizeof(int);
+  int thread_size = size / state.threads;
+  int from = thread_size * state.thread_index;
+  int to = from + thread_size;
+
+  if (state.thread_index == 0) {
+    test_vector = new std::vector<int>(size);
+  }
+
+  while (state.KeepRunning()) {
+    for (int i = from; i < to; i++) {
+      // No need to lock test_vector_mu as ranges
+      // do not overlap between threads.
+      benchmark::DoNotOptimize(test_vector->at(i) = 1);
+    }
+  }
+
+  if (state.thread_index == 0) {
+    delete test_vector;
+  }
+}
+BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
+
 BENCHMARK_MAIN()