abseil-cpp/absl/synchronization/mutex_benchmark.cc
Go to the documentation of this file.
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <cstdint>
16 #include <mutex> // NOLINT(build/c++11)
17 #include <vector>
18 
19 #include "absl/base/config.h"
20 #include "absl/base/internal/cycleclock.h"
21 #include "absl/base/internal/spinlock.h"
22 #include "absl/synchronization/blocking_counter.h"
23 #include "absl/synchronization/internal/thread_pool.h"
24 #include "absl/synchronization/mutex.h"
25 #include "benchmark/benchmark.h"
26 
27 namespace {
28 
29 void BM_Mutex(benchmark::State& state) {
30  static absl::Mutex* mu = new absl::Mutex;
31  for (auto _ : state) {
32  absl::MutexLock lock(mu);
33  }
34 }
35 BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
36 
37 static void DelayNs(int64_t ns, int* data) {
41  ++(*data);
43  }
44 }
45 
46 template <typename MutexType>
47 class RaiiLocker {
48  public:
49  explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
50  ~RaiiLocker() { mu_->Unlock(); }
51  private:
52  MutexType* mu_;
53 };
54 
55 template <>
56 class RaiiLocker<std::mutex> {
57  public:
58  explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
59  ~RaiiLocker() { mu_->unlock(); }
60  private:
61  std::mutex* mu_;
62 };
63 
64 // RAII object to change the Mutex priority of the running thread.
65 class ScopedThreadMutexPriority {
66  public:
67  explicit ScopedThreadMutexPriority(int priority) {
71  // Bump next_priority_read_cycles to the infinite future so that the
72  // implementation doesn't re-read the thread's actual scheduler priority
73  // and replace our temporary scoped priority.
76  }
77  ~ScopedThreadMutexPriority() {
78  // Reset the "next priority read time" back to the infinite past so that
79  // the next time the Mutex implementation wants to know this thread's
80  // priority, it re-reads it from the OS instead of using our overridden
81  // priority.
85  }
86 };
87 
88 void BM_MutexEnqueue(benchmark::State& state) {
89  // In the "multiple priorities" variant of the benchmark, one of the
90  // threads runs with Mutex priority 0 while the rest run at elevated priority.
91  // This benchmarks the performance impact of the presence of a low priority
92  // waiter when a higher priority waiter adds itself of the queue
93  // (b/175224064).
94  //
95  // NOTE: The actual scheduler priority is not modified in this benchmark:
96  // all of the threads get CPU slices with the same priority. Only the
97  // Mutex queueing behavior is modified.
98  const bool multiple_priorities = state.range(0);
99  ScopedThreadMutexPriority priority_setter(
100  (multiple_priorities && state.thread_index() != 0) ? 1 : 0);
101 
102  struct Shared {
103  absl::Mutex mu;
104  std::atomic<int> looping_threads{0};
105  std::atomic<int> blocked_threads{0};
106  std::atomic<bool> thread_has_mutex{false};
107  };
108  static Shared* shared = new Shared;
109 
110  // Set up 'blocked_threads' to count how many threads are currently blocked
111  // in Abseil synchronization code.
112  //
113  // NOTE: Blocking done within the Google Benchmark library itself (e.g.
114  // the barrier which synchronizes threads entering and exiting the benchmark
115  // loop) does _not_ get registered in this counter. This is because Google
116  // Benchmark uses its own synchronization primitives based on std::mutex, not
117  // Abseil synchronization primitives. If at some point the benchmark library
118  // merges into Abseil, this code may break.
120  &shared->blocked_threads);
121 
122  // The benchmark framework may run several iterations in the same process,
123  // reusing the same static-initialized 'shared' object. Given the semantics
124  // of the members, here, we expect everything to be reset to zero by the
125  // end of any iteration. Assert that's the case, just to be sure.
127  shared->looping_threads.load(std::memory_order_relaxed) == 0 &&
128  shared->blocked_threads.load(std::memory_order_relaxed) == 0 &&
129  !shared->thread_has_mutex.load(std::memory_order_relaxed),
130  "Shared state isn't zeroed at start of benchmark iteration");
131 
132  static constexpr int kBatchSize = 1000;
133  while (state.KeepRunningBatch(kBatchSize)) {
134  shared->looping_threads.fetch_add(1);
135  for (int i = 0; i < kBatchSize; i++) {
136  {
137  absl::MutexLock l(&shared->mu);
138  shared->thread_has_mutex.store(true, std::memory_order_relaxed);
139  // Spin until all other threads are either out of the benchmark loop
140  // or blocked on the mutex. This ensures that the mutex queue is kept
141  // at its maximal length to benchmark the performance of queueing on
142  // a highly contended mutex.
143  while (shared->looping_threads.load(std::memory_order_relaxed) -
144  shared->blocked_threads.load(std::memory_order_relaxed) !=
145  1) {
146  }
147  shared->thread_has_mutex.store(false);
148  }
149  // Spin until some other thread has acquired the mutex before we block
150  // again. This ensures that we always go through the slow (queueing)
151  // acquisition path rather than reacquiring the mutex we just released.
152  while (!shared->thread_has_mutex.load(std::memory_order_relaxed) &&
153  shared->looping_threads.load(std::memory_order_relaxed) > 1) {
154  }
155  }
156  // The benchmark framework uses a barrier to ensure that all of the threads
157  // complete their benchmark loop together before any of the threads exit
158  // the loop. So, we need to remove ourselves from the "looping threads"
159  // counter here before potentially blocking on that barrier. Otherwise,
160  // another thread spinning above might wait forever for this thread to
161  // block on the mutex while we in fact are waiting to exit.
162  shared->looping_threads.fetch_add(-1);
163  }
165  nullptr);
166 }
167 
168 BENCHMARK(BM_MutexEnqueue)
169  ->Threads(4)
170  ->Threads(64)
171  ->Threads(128)
172  ->Threads(512)
173  ->ArgName("multiple_priorities")
174  ->Arg(false)
175  ->Arg(true);
176 
177 template <typename MutexType>
178 void BM_Contended(benchmark::State& state) {
179  int priority = state.thread_index() % state.range(1);
180  ScopedThreadMutexPriority priority_setter(priority);
181 
182  struct Shared {
183  MutexType mu;
184  int data = 0;
185  };
186  static auto* shared = new Shared;
187  int local = 0;
188  for (auto _ : state) {
189  // Here we model both local work outside of the critical section as well as
190  // some work inside of the critical section. The idea is to capture some
191  // more or less realisitic contention levels.
192  // If contention is too low, the benchmark won't measure anything useful.
193  // If contention is unrealistically high, the benchmark will favor
194  // bad mutex implementations that block and otherwise distract threads
195  // from the mutex and shared state for as much as possible.
196  // To achieve this amount of local work is multiplied by number of threads
197  // to keep ratio between local work and critical section approximately
198  // equal regardless of number of threads.
199  DelayNs(100 * state.threads(), &local);
200  RaiiLocker<MutexType> locker(&shared->mu);
201  DelayNs(state.range(0), &shared->data);
202  }
203 }
204 void SetupBenchmarkArgs(benchmark::internal::Benchmark* bm,
205  bool do_test_priorities) {
206  const int max_num_priorities = do_test_priorities ? 2 : 1;
207  bm->UseRealTime()
208  // ThreadPerCpu poorly handles non-power-of-two CPU counts.
209  ->Threads(1)
210  ->Threads(2)
211  ->Threads(4)
212  ->Threads(6)
213  ->Threads(8)
214  ->Threads(12)
215  ->Threads(16)
216  ->Threads(24)
217  ->Threads(32)
218  ->Threads(48)
219  ->Threads(64)
220  ->Threads(96)
221  ->Threads(128)
222  ->Threads(192)
223  ->Threads(256)
224  ->ArgNames({"cs_ns", "num_prios"});
225  // Some empirically chosen amounts of work in critical section.
226  // 1 is low contention, 2000 is high contention and few values in between.
227  for (int critical_section_ns : {1, 20, 50, 200, 2000}) {
228  for (int num_priorities = 1; num_priorities <= max_num_priorities;
229  num_priorities++) {
230  bm->ArgPair(critical_section_ns, num_priorities);
231  }
232  }
233 }
234 
235 BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
236  ->Apply([](benchmark::internal::Benchmark* bm) {
237  SetupBenchmarkArgs(bm, /*do_test_priorities=*/true);
238  });
239 
241  ->Apply([](benchmark::internal::Benchmark* bm) {
242  SetupBenchmarkArgs(bm, /*do_test_priorities=*/false);
243  });
244 
245 BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
246  ->Apply([](benchmark::internal::Benchmark* bm) {
247  SetupBenchmarkArgs(bm, /*do_test_priorities=*/false);
248  });
249 
250 // Measure the overhead of conditions on mutex release (when they must be
251 // evaluated). Mutex has (some) support for equivalence classes allowing
252 // Conditions with the same function/argument to potentially not be multiply
253 // evaluated.
254 //
255 // num_classes==0 is used for the special case of every waiter being distinct.
256 void BM_ConditionWaiters(benchmark::State& state) {
257  int num_classes = state.range(0);
258  int num_waiters = state.range(1);
259 
260  struct Helper {
261  static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
262  init->DecrementCount();
263  m->LockWhen(absl::Condition(
264  static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p));
265  m->Unlock();
266  }
267  };
268 
269  if (num_classes == 0) {
270  // No equivalence classes.
271  num_classes = num_waiters;
272  }
273 
274  absl::BlockingCounter init(num_waiters);
275  absl::Mutex mu;
276  std::vector<int> equivalence_classes(num_classes, 1);
277 
278  // Must be declared last to be destroyed first.
280 
281  for (int i = 0; i < num_waiters; i++) {
282  // Mutex considers Conditions with the same function and argument
283  // to be equivalent.
284  pool.Schedule([&, i] {
285  Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
286  });
287  }
288  init.Wait();
289 
290  for (auto _ : state) {
291  mu.Lock();
292  mu.Unlock(); // Each unlock requires Condition evaluation for our waiters.
293  }
294 
295  mu.Lock();
296  for (int i = 0; i < num_classes; i++) {
297  equivalence_classes[i] = 0;
298  }
299  mu.Unlock();
300 }
301 
302 // Some configurations have higher thread limits than others.
303 #if defined(__linux__) && !defined(ABSL_HAVE_THREAD_SANITIZER)
304 constexpr int kMaxConditionWaiters = 8192;
305 #else
306 constexpr int kMaxConditionWaiters = 1024;
307 #endif
308 BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);
309 
310 } // namespace
ABSL_RAW_CHECK
#define ABSL_RAW_CHECK(condition, message)
Definition: abseil-cpp/absl/base/internal/raw_logging.h:59
absl::synchronization_internal::GetOrCreateCurrentThreadIdentity
base_internal::ThreadIdentity * GetOrCreateCurrentThreadIdentity()
Definition: abseil-cpp/absl/synchronization/internal/create_thread_identity.h:43
init
const char * init
Definition: upb/upb/bindings/lua/main.c:49
absl::base_internal::ThreadIdentity
Definition: abseil-cpp/absl/base/internal/thread_identity.h:137
priority
int priority
Definition: abseil-cpp/absl/synchronization/internal/graphcycles.cc:286
benchmark::internal::Benchmark::ArgPair
Benchmark * ArgPair(int64_t x, int64_t y)
Definition: benchmark/include/benchmark/benchmark.h:870
absl::synchronization_internal::ThreadPool
Definition: third_party/abseil-cpp/absl/synchronization/internal/thread_pool.h:33
mutex
static uv_mutex_t mutex
Definition: threadpool.c:34
absl::Mutex
Definition: abseil-cpp/absl/synchronization/mutex.h:131
absl::base_internal::CycleClock::Frequency
static double Frequency()
Definition: abseil-cpp/absl/base/internal/cycleclock.cc:69
absl::base_internal::SpinLock
Definition: third_party/abseil-cpp/absl/base/internal/spinlock.h:52
benchmark::internal::Benchmark::Threads
Benchmark * Threads(int t)
Definition: benchmark/src/benchmark_register.cc:409
benchmark::DoNotOptimize
BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const &value)
Definition: benchmark/include/benchmark/benchmark.h:375
xds_manager.p
p
Definition: xds_manager.py:60
benchmark::internal::Benchmark::ArgNames
Benchmark * ArgNames(const std::vector< std::string > &names)
Definition: benchmark/src/benchmark_register.cc:298
mu_
Mutex mu_
Definition: oob_backend_metric.cc:115
absl::MutexLock
Definition: abseil-cpp/absl/synchronization/mutex.h:525
end
char * end
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1008
int64_t
signed __int64 int64_t
Definition: stdint-msvc2008.h:89
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
absl::synchronization_internal::PerThreadSem::SetThreadBlockedCounter
static void SetThreadBlockedCounter(std::atomic< int > *counter)
Definition: abseil-cpp/absl/synchronization/internal/per_thread_sem.cc:31
mu
Mutex mu
Definition: server_config_selector_filter.cc:74
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
BENCHMARK_TEMPLATE
#define BENCHMARK_TEMPLATE(n, a)
Definition: benchmark/include/benchmark/benchmark.h:1231
absl::base_internal::ThreadIdentity::per_thread_synch
PerThreadSynch per_thread_synch
Definition: abseil-cpp/absl/base/internal/thread_identity.h:142
gmock_output_test._
_
Definition: bloaty/third_party/googletest/googlemock/test/gmock_output_test.py:175
absl::BlockingCounter
Definition: abseil-cpp/absl/synchronization/blocking_counter.h:63
absl::base_internal::CycleClock::Now
static int64_t Now()
Definition: abseil-cpp/absl/base/internal/cycleclock.cc:63
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
absl::Condition
Definition: abseil-cpp/absl/synchronization/mutex.h:663
min
#define min(a, b)
Definition: qsort.h:83
benchmark::internal::Benchmark
Definition: benchmark/include/benchmark/benchmark.h:834
absl::base_internal::PerThreadSynch::next_priority_read_cycles
int64_t next_priority_read_cycles
Definition: abseil-cpp/absl/base/internal/thread_identity.h:128
benchmark::State
Definition: benchmark/include/benchmark/benchmark.h:503
google::protobuf.internal::Mutex
WrappedMutex Mutex
Definition: bloaty/third_party/protobuf/src/google/protobuf/stubs/mutex.h:113
std
Definition: grpcpp/impl/codegen/async_unary_call.h:407
MutexType
std::mutex MutexType
Definition: bloaty/third_party/re2/util/mutex.h:29
state
Definition: bloaty/third_party/zlib/contrib/blast/blast.c:41
local
#define local
Definition: bloaty/third_party/zlib/contrib/blast/blast.c:36
pool
InternalDescriptorPool * pool
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/protobuf.h:807
ns
static int64_t ns
Definition: bloaty/third_party/re2/util/benchmark.cc:43
BENCHMARK
#define BENCHMARK(n)
Definition: benchmark/include/benchmark/benchmark.h:1170
benchmark::internal::Benchmark::UseRealTime
Benchmark * UseRealTime()
Definition: benchmark/src/benchmark_register.cc:377
run_grpclb_interop_tests.l
dictionary l
Definition: run_grpclb_interop_tests.py:410
regress.m
m
Definition: regress/regress.py:25
absl::base_internal::PerThreadSynch::priority
int priority
Definition: abseil-cpp/absl/base/internal/thread_identity.h:94
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230


grpc
Author(s):
autogenerated on Fri May 16 2025 02:59:31