15 #include "absl/random/zipf_distribution.h"
26 #include "gmock/gmock.h"
27 #include "gtest/gtest.h"
28 #include "absl/base/internal/raw_logging.h"
29 #include "absl/random/internal/chi_square.h"
30 #include "absl/random/internal/pcg_engine.h"
31 #include "absl/random/internal/sequence_urbg.h"
32 #include "absl/random/random.h"
33 #include "absl/strings/str_cat.h"
34 #include "absl/strings/str_replace.h"
35 #include "absl/strings/strip.h"
42 template <
typename IntType>
49 TYPED_TEST(ZipfDistributionTypedTest, SerializeTest) {
52 constexpr
int kCount = 1000;
54 for (
const auto& param : {
57 param_type(100, 3, 2),
62 const auto k = param.k();
63 const auto q = param.q();
64 const auto v = param.v();
96 auto sample_min =
after.max();
97 auto sample_max =
after.min();
98 for (
int i = 0;
i < kCount;
i++) {
102 if (sample > sample_max) sample_max = sample;
103 if (sample < sample_min) sample_min = sample;
106 absl::StrCat(
"Range: ", +sample_min,
", ", +sample_max));
112 ZipfModel(
size_t k,
double q,
double v) :
k_(
k), q_(
q), v_(
v) {}
114 double mean()
const {
return mean_; }
121 double PMF(
size_t i) {
return i >= hnq_.size() ? 0.0 : hnq_[
i] / sum_hnq_; }
124 double CDF(
size_t i) {
125 if (
i >= hnq_.size()) {
139 std::pair<size_t, size_t> InverseCDF(
double p) {
141 size_t max = hnq_.size();
173 double qm1 = q_ - 1.0;
174 double sum_hnq_m1 = 0;
175 for (
size_t i = 0;
i <
k_;
i++) {
177 const double x = v_ +
i;
181 (q_ == 2.0) ? (1.0 /
x)
182 : (q_ == 3.0) ? (1.0 / (
x *
x)) : std::pow(
x, -qm1);
187 (q_ == 2.0) ? (1.0 / (
x *
x))
188 : (q_ == 3.0) ? (1.0 / (
x *
x *
x)) : std::pow(
x, -q_);
191 if (
i > 1000 && hnq <= 1e-10) {
196 assert(sum_hnq_ > 0);
197 mean_ = sum_hnq_m1 / sum_hnq_;
206 std::vector<double> hnq_;
215 ZipfTest() : ZipfModel(GetParam().
k(), GetParam().
q(), GetParam().
v()) {}
223 TEST_P(ZipfTest, ChiSquaredTest) {
224 const auto& param = GetParam();
227 size_t trials = 10000;
230 std::vector<size_t> points;
231 std::vector<double> expected;
233 double last_cdf = 0.0;
235 for (
double p = 0.01;
p < 1.0;
p += 0.01) {
236 auto x = InverseCDF(
p);
237 if (points.empty() || points.back() <
x.second) {
238 const double p = CDF(
x.second);
239 points.push_back(
x.second);
240 double q =
p - last_cdf;
241 expected.push_back(q);
248 if (last_cdf < 0.999) {
250 double q = 1.0 - last_cdf;
251 expected.push_back(q);
257 expected.back() += (1.0 - last_cdf);
261 trials =
static_cast<size_t>(8.0 / min_p);
267 std::vector<int64_t> buckets(points.size(), 0);
271 for (
size_t i = 0;
i < trials;
i++) {
275 avg +=
static_cast<double>(
x);
277 static_cast<size_t>(
x));
280 avg = avg /
static_cast<double>(trials);
284 for (
auto& e : expected) {
290 const int dof =
static_cast<int>(expected.size()) - 1;
301 const double p_actual =
305 if (chi_square > threshold) {
307 for (
size_t i = 0;
i < expected.size();
i++) {
309 " vs. E=", expected[
i]));
315 chi_square,
" (", p_actual,
")"));
319 <<
" is above the threshold.";
323 std::vector<zipf_u64::param_type> GenParams() {
324 using param = zipf_u64::param_type;
325 const auto k = param().k();
326 const auto q = param().q();
327 const auto v = param().v();
329 return std::vector<zipf_u64::param_type>{
333 param(4, q,
v), param(1 << 4, q,
v), param(
k2, q,
v),
335 param(
k2, q, 0.5), param(
k2, q, 1.5), param(
k2, q, 2.5), param(
k2, q, 10),
337 param(
k2, 1.5,
v), param(
k2, 3,
v), param(
k2, 5,
v), param(
k2, 10,
v),
339 param(
k2, 1.5, 0.5), param(
k2, 3, 1.5), param(
k, 10, 10)};
343 const ::testing::TestParamInfo<zipf_u64::param_type>& info) {
344 const auto&
p = info.param;
354 TEST(ZipfDistributionTest, StabilityTest) {
358 {0x0003eb76f6f7f755ull, 0xFFCEA50FDB2F953Bull, 0xC332DDEFBE6C5AA5ull,
359 0x6558218568AB9702ull, 0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull,
360 0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull, 0x0334FE1EAA0363CFull,
361 0xB5735C904C70A239ull, 0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull});
363 std::vector<int>
output(10);
368 [&] {
return dist(urbg); });
369 EXPECT_THAT(
output,
ElementsAre(10031, 0, 0, 3, 6, 0, 7, 47, 0, 0));
376 [&] {
return dist(urbg); });
377 EXPECT_THAT(
output,
ElementsAre(44, 0, 0, 0, 0, 1, 0, 1, 3, 0));
381 TEST(ZipfDistributionTest, AlgorithmBounds) {
386 const std::pair<uint64_t, int32_t> kInputs[] = {
387 {0xffffffffffffffff, 0x0}, {0x7fffffffffffffff, 0x0},
388 {0x3ffffffffffffffb, 0x1}, {0x1ffffffffffffffd, 0x4},
389 {0xffffffffffffffe, 0x9}, {0x7ffffffffffffff, 0x12},
390 {0x3ffffffffffffff, 0x25}, {0x1ffffffffffffff, 0x4c},
391 {0xffffffffffffff, 0x99}, {0x7fffffffffffff, 0x132},
392 {0x3fffffffffffff, 0x265}, {0x1fffffffffffff, 0x4cc},
393 {0xfffffffffffff, 0x999}, {0x7ffffffffffff, 0x1332},
394 {0x3ffffffffffff, 0x2665}, {0x1ffffffffffff, 0x4ccc},
395 {0xffffffffffff, 0x9998}, {0x7fffffffffff, 0x1332f},
396 {0x3fffffffffff, 0x2665a}, {0x1fffffffffff, 0x4cc9e},
397 {0xfffffffffff, 0x998e0}, {0x7ffffffffff, 0x133051},
398 {0x3ffffffffff, 0x265ae4}, {0x1ffffffffff, 0x4c9ed3},
399 {0xffffffffff, 0x98e223}, {0x7fffffffff, 0x13058c4},
400 {0x3fffffffff, 0x25b178e}, {0x1fffffffff, 0x4a062b2},
401 {0xfffffffff, 0x8ee23b8}, {0x7ffffffff, 0x10b21642},
402 {0x3ffffffff, 0x1d89d89d}, {0x1ffffffff, 0x2fffffff},
403 {0xffffffff, 0x45d1745d}, {0x7fffffff, 0x5a5a5a5a},
404 {0x3fffffff, 0x69ee5846}, {0x1fffffff, 0x73ecade3},
405 {0xfffffff, 0x79a9d260}, {0x7ffffff, 0x7cc0532b},
406 {0x3ffffff, 0x7e5ad146}, {0x1ffffff, 0x7f2c0bec},
407 {0xffffff, 0x7f95adef}, {0x7fffff, 0x7fcac0da},
408 {0x3fffff, 0x7fe55ae2}, {0x1fffff, 0x7ff2ac0e},
409 {0xfffff, 0x7ff955ae}, {0x7ffff, 0x7ffcaac1},
410 {0x3ffff, 0x7ffe555b}, {0x1ffff, 0x7fff2aac},
411 {0xffff, 0x7fff9556}, {0x7fff, 0x7fffcaab},
412 {0x3fff, 0x7fffe555}, {0x1fff, 0x7ffff2ab},
413 {0xfff, 0x7ffff955}, {0x7ff, 0x7ffffcab},
414 {0x3ff, 0x7ffffe55}, {0x1ff, 0x7fffff2b},
415 {0xff, 0x7fffff95}, {0x7f, 0x7fffffcb},
416 {0x3f, 0x7fffffe5}, {0x1f, 0x7ffffff3},
417 {0xf, 0x7ffffff9}, {0x7, 0x7ffffffd},
418 {0x3, 0x7ffffffe}, {0x1, 0x7fffffff},
421 for (
const auto&
instance : kInputs) {