mutex_benchmark.cc
Go to the documentation of this file.
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <cstdint>
16 #include <mutex> // NOLINT(build/c++11)
17 #include <vector>
18 
24 #include "benchmark/benchmark.h"
25 
26 namespace {
27 
28 void BM_Mutex(benchmark::State& state) {
29  static absl::Mutex* mu = new absl::Mutex;
30  for (auto _ : state) {
31  absl::MutexLock lock(mu);
32  }
33 }
34 BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
35 
36 static void DelayNs(int64_t ns, int* data) {
39  while (absl::base_internal::CycleClock::Now() < end) {
40  ++(*data);
41  benchmark::DoNotOptimize(*data);
42  }
43 }
44 
45 template <typename MutexType>
46 class RaiiLocker {
47  public:
48  explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
49  ~RaiiLocker() { mu_->Unlock(); }
50  private:
51  MutexType* mu_;
52 };
53 
54 template <>
55 class RaiiLocker<std::mutex> {
56  public:
57  explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
58  ~RaiiLocker() { mu_->unlock(); }
59  private:
60  std::mutex* mu_;
61 };
62 
63 template <typename MutexType>
64 void BM_Contended(benchmark::State& state) {
65  struct Shared {
66  MutexType mu;
67  int data = 0;
68  };
69  static auto* shared = new Shared;
70  int local = 0;
71  for (auto _ : state) {
72  // Here we model both local work outside of the critical section as well as
73  // some work inside of the critical section. The idea is to capture some
74  // more or less realisitic contention levels.
75  // If contention is too low, the benchmark won't measure anything useful.
76  // If contention is unrealistically high, the benchmark will favor
77  // bad mutex implementations that block and otherwise distract threads
78  // from the mutex and shared state for as much as possible.
79  // To achieve this amount of local work is multiplied by number of threads
80  // to keep ratio between local work and critical section approximately
81  // equal regardless of number of threads.
82  DelayNs(100 * state.threads, &local);
83  RaiiLocker<MutexType> locker(&shared->mu);
84  DelayNs(state.range(0), &shared->data);
85  }
86 }
87 
88 BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
89  ->UseRealTime()
90  // ThreadPerCpu poorly handles non-power-of-two CPU counts.
91  ->Threads(1)
92  ->Threads(2)
93  ->Threads(4)
94  ->Threads(6)
95  ->Threads(8)
96  ->Threads(12)
97  ->Threads(16)
98  ->Threads(24)
99  ->Threads(32)
100  ->Threads(48)
101  ->Threads(64)
102  ->Threads(96)
103  ->Threads(128)
104  ->Threads(192)
105  ->Threads(256)
106  // Some empirically chosen amounts of work in critical section.
107  // 1 is low contention, 200 is high contention and few values in between.
108  ->Arg(1)
109  ->Arg(20)
110  ->Arg(50)
111  ->Arg(200);
112 
113 BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)
114  ->UseRealTime()
115  // ThreadPerCpu poorly handles non-power-of-two CPU counts.
116  ->Threads(1)
117  ->Threads(2)
118  ->Threads(4)
119  ->Threads(6)
120  ->Threads(8)
121  ->Threads(12)
122  ->Threads(16)
123  ->Threads(24)
124  ->Threads(32)
125  ->Threads(48)
126  ->Threads(64)
127  ->Threads(96)
128  ->Threads(128)
129  ->Threads(192)
130  ->Threads(256)
131  // Some empirically chosen amounts of work in critical section.
132  // 1 is low contention, 200 is high contention and few values in between.
133  ->Arg(1)
134  ->Arg(20)
135  ->Arg(50)
136  ->Arg(200);
137 
138 BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
139  ->UseRealTime()
140  // ThreadPerCpu poorly handles non-power-of-two CPU counts.
141  ->Threads(1)
142  ->Threads(2)
143  ->Threads(4)
144  ->Threads(6)
145  ->Threads(8)
146  ->Threads(12)
147  ->Threads(16)
148  ->Threads(24)
149  ->Threads(32)
150  ->Threads(48)
151  ->Threads(64)
152  ->Threads(96)
153  ->Threads(128)
154  ->Threads(192)
155  ->Threads(256)
156  // Some empirically chosen amounts of work in critical section.
157  // 1 is low contention, 200 is high contention and few values in between.
158  ->Arg(1)
159  ->Arg(20)
160  ->Arg(50)
161  ->Arg(200);
162 
163 // Measure the overhead of conditions on mutex release (when they must be
164 // evaluated). Mutex has (some) support for equivalence classes allowing
165 // Conditions with the same function/argument to potentially not be multiply
166 // evaluated.
167 //
168 // num_classes==0 is used for the special case of every waiter being distinct.
169 void BM_ConditionWaiters(benchmark::State& state) {
170  int num_classes = state.range(0);
171  int num_waiters = state.range(1);
172 
173  struct Helper {
174  static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
175  init->DecrementCount();
177  static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p));
178  m->Unlock();
179  }
180  };
181 
182  if (num_classes == 0) {
183  // No equivalence classes.
184  num_classes = num_waiters;
185  }
186 
187  absl::BlockingCounter init(num_waiters);
188  absl::Mutex mu;
189  std::vector<int> equivalence_classes(num_classes, 1);
190 
191  // Must be declared last to be destroyed first.
193 
194  for (int i = 0; i < num_waiters; i++) {
195  // Mutex considers Conditions with the same function and argument
196  // to be equivalent.
197  pool.Schedule([&, i] {
198  Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
199  });
200  }
201  init.Wait();
202 
203  for (auto _ : state) {
204  mu.Lock();
205  mu.Unlock(); // Each unlock requires Condition evaluation for our waiters.
206  }
207 
208  mu.Lock();
209  for (int i = 0; i < num_classes; i++) {
210  equivalence_classes[i] = 0;
211  }
212  mu.Unlock();
213 }
214 
215 // Some configurations have higher thread limits than others.
216 #if defined(__linux__) && !defined(THREAD_SANITIZER)
217 constexpr int kMaxConditionWaiters = 8192;
218 #else
219 constexpr int kMaxConditionWaiters = 1024;
220 #endif
221 BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);
222 
223 } // namespace
int v
Definition: variant_test.cc:81
void LockWhen(const Condition &cond) EXCLUSIVE_LOCK_FUNCTION()
char * end
void Unlock() UNLOCK_FUNCTION()
static char data[kDataSize]
Definition: city_test.cc:31
void Lock() EXCLUSIVE_LOCK_FUNCTION()


abseil_cpp
Author(s):
autogenerated on Wed Jun 19 2019 19:19:57