abseil_cpp: mutex_benchmark.cc Source File

Go to the documentation of this file.
00001 // Copyright 2017 The Abseil Authors.
00002 //
00003 // Licensed under the Apache License, Version 2.0 (the "License");
00004 // you may not use this file except in compliance with the License.
00005 // You may obtain a copy of the License at
00006 //
00007 //      https://www.apache.org/licenses/LICENSE-2.0
00008 //
00009 // Unless required by applicable law or agreed to in writing, software
00010 // distributed under the License is distributed on an "AS IS" BASIS,
00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 // See the License for the specific language governing permissions and
00013 // limitations under the License.
00014 
00015 #include <cstdint>
00016 #include <mutex>  // NOLINT(build/c++11)
00017 #include <vector>
00018 
00019 #include "absl/base/internal/cycleclock.h"
00020 #include "absl/base/internal/spinlock.h"
00021 #include "absl/synchronization/blocking_counter.h"
00022 #include "absl/synchronization/internal/thread_pool.h"
00023 #include "absl/synchronization/mutex.h"
00024 #include "benchmark/benchmark.h"
00025 
00026 namespace {
00027 
00028 void BM_Mutex(benchmark::State& state) {
00029   static absl::Mutex* mu = new absl::Mutex;
00030   for (auto _ : state) {
00031     absl::MutexLock lock(mu);
00032   }
00033 }
00034 BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
00035 
00036 static void DelayNs(int64_t ns, int* data) {
00037   int64_t end = absl::base_internal::CycleClock::Now() +
00038                 ns * absl::base_internal::CycleClock::Frequency() / 1e9;
00039   while (absl::base_internal::CycleClock::Now() < end) {
00040     ++(*data);
00041     benchmark::DoNotOptimize(*data);
00042   }
00043 }
00044 
00045 template <typename MutexType>
00046 class RaiiLocker {
00047  public:
00048   explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
00049   ~RaiiLocker() { mu_->Unlock(); }
00050  private:
00051   MutexType* mu_;
00052 };
00053 
00054 template <>
00055 class RaiiLocker<std::mutex> {
00056  public:
00057   explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
00058   ~RaiiLocker() { mu_->unlock(); }
00059  private:
00060   std::mutex* mu_;
00061 };
00062 
00063 template <typename MutexType>
00064 void BM_Contended(benchmark::State& state) {
00065   struct Shared {
00066     MutexType mu;
00067     int data = 0;
00068   };
00069   static auto* shared = new Shared;
00070   int local = 0;
00071   for (auto _ : state) {
00072     // Here we model both local work outside of the critical section as well as
00073     // some work inside of the critical section. The idea is to capture some
00074     // more or less realisitic contention levels.
00075     // If contention is too low, the benchmark won't measure anything useful.
00076     // If contention is unrealistically high, the benchmark will favor
00077     // bad mutex implementations that block and otherwise distract threads
00078     // from the mutex and shared state for as much as possible.
00079     // To achieve this amount of local work is multiplied by number of threads
00080     // to keep ratio between local work and critical section approximately
00081     // equal regardless of number of threads.
00082     DelayNs(100 * state.threads, &local);
00083     RaiiLocker<MutexType> locker(&shared->mu);
00084     DelayNs(state.range(0), &shared->data);
00085   }
00086 }
00087 
00088 BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
00089     ->UseRealTime()
00090     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
00091     ->Threads(1)
00092     ->Threads(2)
00093     ->Threads(4)
00094     ->Threads(6)
00095     ->Threads(8)
00096     ->Threads(12)
00097     ->Threads(16)
00098     ->Threads(24)
00099     ->Threads(32)
00100     ->Threads(48)
00101     ->Threads(64)
00102     ->Threads(96)
00103     ->Threads(128)
00104     ->Threads(192)
00105     ->Threads(256)
00106     // Some empirically chosen amounts of work in critical section.
00107     // 1 is low contention, 200 is high contention and few values in between.
00108     ->Arg(1)
00109     ->Arg(20)
00110     ->Arg(50)
00111     ->Arg(200);
00112 
00113 BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)
00114     ->UseRealTime()
00115     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
00116     ->Threads(1)
00117     ->Threads(2)
00118     ->Threads(4)
00119     ->Threads(6)
00120     ->Threads(8)
00121     ->Threads(12)
00122     ->Threads(16)
00123     ->Threads(24)
00124     ->Threads(32)
00125     ->Threads(48)
00126     ->Threads(64)
00127     ->Threads(96)
00128     ->Threads(128)
00129     ->Threads(192)
00130     ->Threads(256)
00131     // Some empirically chosen amounts of work in critical section.
00132     // 1 is low contention, 200 is high contention and few values in between.
00133     ->Arg(1)
00134     ->Arg(20)
00135     ->Arg(50)
00136     ->Arg(200);
00137 
00138 BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
00139     ->UseRealTime()
00140     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
00141     ->Threads(1)
00142     ->Threads(2)
00143     ->Threads(4)
00144     ->Threads(6)
00145     ->Threads(8)
00146     ->Threads(12)
00147     ->Threads(16)
00148     ->Threads(24)
00149     ->Threads(32)
00150     ->Threads(48)
00151     ->Threads(64)
00152     ->Threads(96)
00153     ->Threads(128)
00154     ->Threads(192)
00155     ->Threads(256)
00156     // Some empirically chosen amounts of work in critical section.
00157     // 1 is low contention, 200 is high contention and few values in between.
00158     ->Arg(1)
00159     ->Arg(20)
00160     ->Arg(50)
00161     ->Arg(200);
00162 
00163 // Measure the overhead of conditions on mutex release (when they must be
00164 // evaluated).  Mutex has (some) support for equivalence classes allowing
00165 // Conditions with the same function/argument to potentially not be multiply
00166 // evaluated.
00167 //
00168 // num_classes==0 is used for the special case of every waiter being distinct.
00169 void BM_ConditionWaiters(benchmark::State& state) {
00170   int num_classes = state.range(0);
00171   int num_waiters = state.range(1);
00172 
00173   struct Helper {
00174     static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
00175       init->DecrementCount();
00176       m->LockWhen(absl::Condition(
00177           static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p));
00178       m->Unlock();
00179     }
00180   };
00181 
00182   if (num_classes == 0) {
00183     // No equivalence classes.
00184     num_classes = num_waiters;
00185   }
00186 
00187   absl::BlockingCounter init(num_waiters);
00188   absl::Mutex mu;
00189   std::vector<int> equivalence_classes(num_classes, 1);
00190 
00191   // Must be declared last to be destroyed first.
00192   absl::synchronization_internal::ThreadPool pool(num_waiters);
00193 
00194   for (int i = 0; i < num_waiters; i++) {
00195     // Mutex considers Conditions with the same function and argument
00196     // to be equivalent.
00197     pool.Schedule([&, i] {
00198       Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
00199     });
00200   }
00201   init.Wait();
00202 
00203   for (auto _ : state) {
00204     mu.Lock();
00205     mu.Unlock();  // Each unlock requires Condition evaluation for our waiters.
00206   }
00207 
00208   mu.Lock();
00209   for (int i = 0; i < num_classes; i++) {
00210     equivalence_classes[i] = 0;
00211   }
00212   mu.Unlock();
00213 }
00214 
00215 // Some configurations have higher thread limits than others.
00216 #if defined(__linux__) && !defined(THREAD_SANITIZER)
00217 constexpr int kMaxConditionWaiters = 8192;
00218 #else
00219 constexpr int kMaxConditionWaiters = 1024;
00220 #endif
00221 BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);
00222 
00223 }  // namespace