string_view_benchmark.cc
Go to the documentation of this file.
00001 // Copyright 2018 The Abseil Authors.
00002 //
00003 // Licensed under the Apache License, Version 2.0 (the "License");
00004 // you may not use this file except in compliance with the License.
00005 // You may obtain a copy of the License at
00006 //
00007 //      https://www.apache.org/licenses/LICENSE-2.0
00008 //
00009 // Unless required by applicable law or agreed to in writing, software
00010 // distributed under the License is distributed on an "AS IS" BASIS,
00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 // See the License for the specific language governing permissions and
00013 // limitations under the License.
00014 
00015 #include "absl/strings/string_view.h"
00016 
00017 #include <algorithm>
00018 #include <cstdint>
00019 #include <map>
00020 #include <random>
00021 #include <string>
00022 #include <unordered_set>
00023 #include <vector>
00024 
00025 #include "benchmark/benchmark.h"
00026 #include "absl/base/attributes.h"
00027 #include "absl/base/internal/raw_logging.h"
00028 #include "absl/base/macros.h"
00029 #include "absl/strings/str_cat.h"
00030 
00031 namespace {
00032 
00033 // Provide a forcibly out-of-line wrapper for operator== that can be used in
00034 // benchmarks to measure the impact of inlining.
00035 ABSL_ATTRIBUTE_NOINLINE
00036 bool NonInlinedEq(absl::string_view a, absl::string_view b) { return a == b; }
00037 
00038 // We use functions that cannot be inlined to perform the comparison loops so
00039 // that inlining of the operator== can't optimize away *everything*.
00040 ABSL_ATTRIBUTE_NOINLINE
00041 void DoEqualityComparisons(benchmark::State& state, absl::string_view a,
00042                            absl::string_view b) {
00043   for (auto _ : state) {
00044     benchmark::DoNotOptimize(a == b);
00045   }
00046 }
00047 
00048 void BM_EqualIdentical(benchmark::State& state) {
00049   std::string x(state.range(0), 'a');
00050   DoEqualityComparisons(state, x, x);
00051 }
00052 BENCHMARK(BM_EqualIdentical)->DenseRange(0, 3)->Range(4, 1 << 10);
00053 
00054 void BM_EqualSame(benchmark::State& state) {
00055   std::string x(state.range(0), 'a');
00056   std::string y = x;
00057   DoEqualityComparisons(state, x, y);
00058 }
00059 BENCHMARK(BM_EqualSame)
00060     ->DenseRange(0, 10)
00061     ->Arg(20)
00062     ->Arg(40)
00063     ->Arg(70)
00064     ->Arg(110)
00065     ->Range(160, 4096);
00066 
00067 void BM_EqualDifferent(benchmark::State& state) {
00068   const int len = state.range(0);
00069   std::string x(len, 'a');
00070   std::string y = x;
00071   if (len > 0) {
00072     y[len - 1] = 'b';
00073   }
00074   DoEqualityComparisons(state, x, y);
00075 }
00076 BENCHMARK(BM_EqualDifferent)->DenseRange(0, 3)->Range(4, 1 << 10);
00077 
00078 // This benchmark is intended to check that important simplifications can be
00079 // made with absl::string_view comparisons against constant strings. The idea is
00080 // that if constant strings cause redundant components of the comparison, the
00081 // compiler should detect and eliminate them. Here we use 8 different strings,
00082 // each with the same size. Provided our comparison makes the implementation
00083 // inline-able by the compiler, it should fold all of these away into a single
00084 // size check once per loop iteration.
00085 ABSL_ATTRIBUTE_NOINLINE
00086 void DoConstantSizeInlinedEqualityComparisons(benchmark::State& state,
00087                                               absl::string_view a) {
00088   for (auto _ : state) {
00089     benchmark::DoNotOptimize(a == "aaa");
00090     benchmark::DoNotOptimize(a == "bbb");
00091     benchmark::DoNotOptimize(a == "ccc");
00092     benchmark::DoNotOptimize(a == "ddd");
00093     benchmark::DoNotOptimize(a == "eee");
00094     benchmark::DoNotOptimize(a == "fff");
00095     benchmark::DoNotOptimize(a == "ggg");
00096     benchmark::DoNotOptimize(a == "hhh");
00097   }
00098 }
00099 void BM_EqualConstantSizeInlined(benchmark::State& state) {
00100   std::string x(state.range(0), 'a');
00101   DoConstantSizeInlinedEqualityComparisons(state, x);
00102 }
00103 // We only need to check for size of 3, and <> 3 as this benchmark only has to
00104 // do with size differences.
00105 BENCHMARK(BM_EqualConstantSizeInlined)->DenseRange(2, 4);
00106 
00107 // This benchmark exists purely to give context to the above timings: this is
00108 // what they would look like if the compiler is completely unable to simplify
00109 // between two comparisons when they are comparing against constant strings.
00110 ABSL_ATTRIBUTE_NOINLINE
00111 void DoConstantSizeNonInlinedEqualityComparisons(benchmark::State& state,
00112                                                  absl::string_view a) {
00113   for (auto _ : state) {
00114     // Force these out-of-line to compare with the above function.
00115     benchmark::DoNotOptimize(NonInlinedEq(a, "aaa"));
00116     benchmark::DoNotOptimize(NonInlinedEq(a, "bbb"));
00117     benchmark::DoNotOptimize(NonInlinedEq(a, "ccc"));
00118     benchmark::DoNotOptimize(NonInlinedEq(a, "ddd"));
00119     benchmark::DoNotOptimize(NonInlinedEq(a, "eee"));
00120     benchmark::DoNotOptimize(NonInlinedEq(a, "fff"));
00121     benchmark::DoNotOptimize(NonInlinedEq(a, "ggg"));
00122     benchmark::DoNotOptimize(NonInlinedEq(a, "hhh"));
00123   }
00124 }
00125 
00126 void BM_EqualConstantSizeNonInlined(benchmark::State& state) {
00127   std::string x(state.range(0), 'a');
00128   DoConstantSizeNonInlinedEqualityComparisons(state, x);
00129 }
00130 // We only need to check for size of 3, and <> 3 as this benchmark only has to
00131 // do with size differences.
00132 BENCHMARK(BM_EqualConstantSizeNonInlined)->DenseRange(2, 4);
00133 
00134 void BM_CompareSame(benchmark::State& state) {
00135   const int len = state.range(0);
00136   std::string x;
00137   for (int i = 0; i < len; i++) {
00138     x += 'a';
00139   }
00140   std::string y = x;
00141   absl::string_view a = x;
00142   absl::string_view b = y;
00143 
00144   for (auto _ : state) {
00145     benchmark::DoNotOptimize(a.compare(b));
00146   }
00147 }
00148 BENCHMARK(BM_CompareSame)->DenseRange(0, 3)->Range(4, 1 << 10);
00149 
00150 void BM_find_string_view_len_one(benchmark::State& state) {
00151   std::string haystack(state.range(0), '0');
00152   absl::string_view s(haystack);
00153   for (auto _ : state) {
00154     benchmark::DoNotOptimize(s.find("x"));  // not present; length 1
00155   }
00156 }
00157 BENCHMARK(BM_find_string_view_len_one)->Range(1, 1 << 20);
00158 
00159 void BM_find_string_view_len_two(benchmark::State& state) {
00160   std::string haystack(state.range(0), '0');
00161   absl::string_view s(haystack);
00162   for (auto _ : state) {
00163     benchmark::DoNotOptimize(s.find("xx"));  // not present; length 2
00164   }
00165 }
00166 BENCHMARK(BM_find_string_view_len_two)->Range(1, 1 << 20);
00167 
00168 void BM_find_one_char(benchmark::State& state) {
00169   std::string haystack(state.range(0), '0');
00170   absl::string_view s(haystack);
00171   for (auto _ : state) {
00172     benchmark::DoNotOptimize(s.find('x'));  // not present
00173   }
00174 }
00175 BENCHMARK(BM_find_one_char)->Range(1, 1 << 20);
00176 
00177 void BM_rfind_one_char(benchmark::State& state) {
00178   std::string haystack(state.range(0), '0');
00179   absl::string_view s(haystack);
00180   for (auto _ : state) {
00181     benchmark::DoNotOptimize(s.rfind('x'));  // not present
00182   }
00183 }
00184 BENCHMARK(BM_rfind_one_char)->Range(1, 1 << 20);
00185 
00186 void BM_worst_case_find_first_of(benchmark::State& state, int haystack_len) {
00187   const int needle_len = state.range(0);
00188   std::string needle;
00189   for (int i = 0; i < needle_len; ++i) {
00190     needle += 'a' + i;
00191   }
00192   std::string haystack(haystack_len, '0');  // 1000 zeros.
00193 
00194   absl::string_view s(haystack);
00195   for (auto _ : state) {
00196     benchmark::DoNotOptimize(s.find_first_of(needle));
00197   }
00198 }
00199 
00200 void BM_find_first_of_short(benchmark::State& state) {
00201   BM_worst_case_find_first_of(state, 10);
00202 }
00203 
00204 void BM_find_first_of_medium(benchmark::State& state) {
00205   BM_worst_case_find_first_of(state, 100);
00206 }
00207 
00208 void BM_find_first_of_long(benchmark::State& state) {
00209   BM_worst_case_find_first_of(state, 1000);
00210 }
00211 
00212 BENCHMARK(BM_find_first_of_short)->DenseRange(0, 4)->Arg(8)->Arg(16)->Arg(32);
00213 BENCHMARK(BM_find_first_of_medium)->DenseRange(0, 4)->Arg(8)->Arg(16)->Arg(32);
00214 BENCHMARK(BM_find_first_of_long)->DenseRange(0, 4)->Arg(8)->Arg(16)->Arg(32);
00215 
00216 struct EasyMap : public std::map<absl::string_view, uint64_t> {
00217   explicit EasyMap(size_t) {}
00218 };
00219 
00220 // This templated benchmark helper function is intended to stress operator== or
00221 // operator< in a realistic test.  It surely isn't entirely realistic, but it's
00222 // a start.  The test creates a map of type Map, a template arg, and populates
00223 // it with table_size key/value pairs. Each key has WordsPerKey words.  After
00224 // creating the map, a number of lookups are done in random order.  Some keys
00225 // are used much more frequently than others in this phase of the test.
00226 template <typename Map, int WordsPerKey>
00227 void StringViewMapBenchmark(benchmark::State& state) {
00228   const int table_size = state.range(0);
00229   const double kFractionOfKeysThatAreHot = 0.2;
00230   const int kNumLookupsOfHotKeys = 20;
00231   const int kNumLookupsOfColdKeys = 1;
00232   const char* words[] = {"the",   "quick",  "brown",    "fox",      "jumped",
00233                          "over",  "the",    "lazy",     "dog",      "and",
00234                          "found", "a",      "large",    "mushroom", "and",
00235                          "a",     "couple", "crickets", "eating",   "pie"};
00236   // Create some keys that consist of words in random order.
00237   std::random_device r;
00238   std::seed_seq seed({r(), r(), r(), r(), r(), r(), r(), r()});
00239   std::mt19937 rng(seed);
00240   std::vector<std::string> keys(table_size);
00241   std::vector<int> all_indices;
00242   const int kBlockSize = 1 << 12;
00243   std::unordered_set<std::string> t(kBlockSize);
00244   std::uniform_int_distribution<int> uniform(0, ABSL_ARRAYSIZE(words) - 1);
00245   for (int i = 0; i < table_size; i++) {
00246     all_indices.push_back(i);
00247     do {
00248       keys[i].clear();
00249       for (int j = 0; j < WordsPerKey; j++) {
00250         absl::StrAppend(&keys[i], j > 0 ? " " : "", words[uniform(rng)]);
00251       }
00252     } while (!t.insert(keys[i]).second);
00253   }
00254 
00255   // Create a list of strings to lookup: a permutation of the array of
00256   // keys we just created, with repeats.  "Hot" keys get repeated more.
00257   std::shuffle(all_indices.begin(), all_indices.end(), rng);
00258   const int num_hot = table_size * kFractionOfKeysThatAreHot;
00259   const int num_cold = table_size - num_hot;
00260   std::vector<int> hot_indices(all_indices.begin(),
00261                                all_indices.begin() + num_hot);
00262   std::vector<int> indices;
00263   for (int i = 0; i < kNumLookupsOfColdKeys; i++) {
00264     indices.insert(indices.end(), all_indices.begin(), all_indices.end());
00265   }
00266   for (int i = 0; i < kNumLookupsOfHotKeys - kNumLookupsOfColdKeys; i++) {
00267     indices.insert(indices.end(), hot_indices.begin(), hot_indices.end());
00268   }
00269   std::shuffle(indices.begin(), indices.end(), rng);
00270   ABSL_RAW_CHECK(
00271       num_cold * kNumLookupsOfColdKeys + num_hot * kNumLookupsOfHotKeys ==
00272           indices.size(),
00273       "");
00274   // After constructing the array we probe it with absl::string_views built from
00275   // test_strings.  This means operator== won't see equal pointers, so
00276   // it'll have to check for equal lengths and equal characters.
00277   std::vector<std::string> test_strings(indices.size());
00278   for (int i = 0; i < indices.size(); i++) {
00279     test_strings[i] = keys[indices[i]];
00280   }
00281 
00282   // Run the benchmark. It includes map construction but is mostly
00283   // map lookups.
00284   for (auto _ : state) {
00285     Map h(table_size);
00286     for (int i = 0; i < table_size; i++) {
00287       h[keys[i]] = i * 2;
00288     }
00289     ABSL_RAW_CHECK(h.size() == table_size, "");
00290     uint64_t sum = 0;
00291     for (int i = 0; i < indices.size(); i++) {
00292       sum += h[test_strings[i]];
00293     }
00294     benchmark::DoNotOptimize(sum);
00295   }
00296 }
00297 
00298 void BM_StdMap_4(benchmark::State& state) {
00299   StringViewMapBenchmark<EasyMap, 4>(state);
00300 }
00301 BENCHMARK(BM_StdMap_4)->Range(1 << 10, 1 << 16);
00302 
00303 void BM_StdMap_8(benchmark::State& state) {
00304   StringViewMapBenchmark<EasyMap, 8>(state);
00305 }
00306 BENCHMARK(BM_StdMap_8)->Range(1 << 10, 1 << 16);
00307 
00308 void BM_CopyToStringNative(benchmark::State& state) {
00309   std::string src(state.range(0), 'x');
00310   absl::string_view sv(src);
00311   std::string dst;
00312   for (auto _ : state) {
00313     dst.assign(sv.begin(), sv.end());
00314   }
00315 }
00316 BENCHMARK(BM_CopyToStringNative)->Range(1 << 3, 1 << 12);
00317 
00318 void BM_AppendToStringNative(benchmark::State& state) {
00319   std::string src(state.range(0), 'x');
00320   absl::string_view sv(src);
00321   std::string dst;
00322   for (auto _ : state) {
00323     dst.clear();
00324     dst.insert(dst.end(), sv.begin(), sv.end());
00325   }
00326 }
00327 BENCHMARK(BM_AppendToStringNative)->Range(1 << 3, 1 << 12);
00328 
00329 }  // namespace


abseil_cpp
Author(s):
autogenerated on Wed Jun 19 2019 19:42:15