abseil-cpp/absl/random/internal/chi_square_test.cc
Go to the documentation of this file.
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/random/internal/chi_square.h"
16 
17 #include <algorithm>
18 #include <cstddef>
19 #include <cstdint>
20 #include <iterator>
21 #include <numeric>
22 #include <vector>
23 
24 #include "gtest/gtest.h"
25 #include "absl/base/macros.h"
26 
31 
32 namespace {
33 
35  struct {
36  int line;
37  double chi_square;
38  int df;
39  double confidence;
40  } const specs[] = {
41  // Testing lookup at 1% confidence
42  {__LINE__, 0, 0, 0.01},
43  {__LINE__, 0.00016, 1, 0.01},
44  {__LINE__, 1.64650, 8, 0.01},
45  {__LINE__, 5.81221, 16, 0.01},
46  {__LINE__, 156.4319, 200, 0.01},
47  {__LINE__, 1121.3784, 1234, 0.01},
48  {__LINE__, 53557.1629, 54321, 0.01},
49  {__LINE__, 651662.6647, 654321, 0.01},
50 
51  // Testing lookup at 99% confidence
52  {__LINE__, 0, 0, 0.99},
53  {__LINE__, 6.635, 1, 0.99},
54  {__LINE__, 20.090, 8, 0.99},
55  {__LINE__, 32.000, 16, 0.99},
56  {__LINE__, 249.4456, 200, 0.99},
57  {__LINE__, 1131.1573, 1023, 0.99},
58  {__LINE__, 1352.5038, 1234, 0.99},
59  {__LINE__, 55090.7356, 54321, 0.99},
60  {__LINE__, 656985.1514, 654321, 0.99},
61 
62  // Testing lookup at 99.9% confidence
63  {__LINE__, 16.2659, 3, 0.999},
64  {__LINE__, 22.4580, 6, 0.999},
65  {__LINE__, 267.5409, 200, 0.999},
66  {__LINE__, 1168.5033, 1023, 0.999},
67  {__LINE__, 55345.1741, 54321, 0.999},
68  {__LINE__, 657861.7284, 654321, 0.999},
69  {__LINE__, 51.1772, 24, 0.999},
70  {__LINE__, 59.7003, 30, 0.999},
71  {__LINE__, 37.6984, 15, 0.999},
72  {__LINE__, 29.5898, 10, 0.999},
73  {__LINE__, 27.8776, 9, 0.999},
74 
75  // Testing lookup at random confidences
76  {__LINE__, 0.000157088, 1, 0.01},
77  {__LINE__, 5.31852, 2, 0.93},
78  {__LINE__, 1.92256, 4, 0.25},
79  {__LINE__, 10.7709, 13, 0.37},
80  {__LINE__, 26.2514, 17, 0.93},
81  {__LINE__, 36.4799, 29, 0.84},
82  {__LINE__, 25.818, 31, 0.27},
83  {__LINE__, 63.3346, 64, 0.50},
84  {__LINE__, 196.211, 128, 0.9999},
85  {__LINE__, 215.21, 243, 0.10},
86  {__LINE__, 285.393, 256, 0.90},
87  {__LINE__, 984.504, 1024, 0.1923},
88  {__LINE__, 2043.85, 2048, 0.4783},
89  {__LINE__, 48004.6, 48273, 0.194},
90  };
91  for (const auto& spec : specs) {
92  SCOPED_TRACE(spec.line);
93  // Verify all values are have at most a 1% relative error.
94  const double val = ChiSquareValue(spec.df, spec.confidence);
95  const double err = std::max(5e-6, spec.chi_square / 5e3); // 1 part in 5000
96  EXPECT_NEAR(spec.chi_square, val, err) << spec.line;
97  }
98 
99  // Relaxed test for extreme values, from
100  // http://www.ciphersbyritter.com/JAVASCRP/NORMCHIK.HTM#ChiSquare
101  EXPECT_NEAR(49.2680, ChiSquareValue(100, 1e-6), 5); // 0.000'005 mark
102  EXPECT_NEAR(123.499, ChiSquareValue(200, 1e-6), 5); // 0.000'005 mark
103 
104  EXPECT_NEAR(149.449, ChiSquareValue(100, 0.999), 0.01);
105  EXPECT_NEAR(161.318, ChiSquareValue(100, 0.9999), 0.01);
106  EXPECT_NEAR(172.098, ChiSquareValue(100, 0.99999), 0.01);
107 
108  EXPECT_NEAR(381.426, ChiSquareValue(300, 0.999), 0.05);
109  EXPECT_NEAR(399.756, ChiSquareValue(300, 0.9999), 0.1);
110  EXPECT_NEAR(416.126, ChiSquareValue(300, 0.99999), 0.2);
111 }
112 
113 TEST(ChiSquareTest, PValue) {
114  struct {
115  int line;
116  double pval;
117  double chi_square;
118  int df;
119  } static const specs[] = {
120  {__LINE__, 1, 0, 0},
121  {__LINE__, 0, 0.001, 0},
122  {__LINE__, 1.000, 0, 453},
123  {__LINE__, 0.134471, 7972.52, 7834},
124  {__LINE__, 0.203922, 28.32, 23},
125  {__LINE__, 0.737171, 48274, 48472},
126  {__LINE__, 0.444146, 583.1234, 579},
127  {__LINE__, 0.294814, 138.2, 130},
128  {__LINE__, 0.0816532, 12.63, 7},
129  {__LINE__, 0, 682.32, 67},
130  {__LINE__, 0.49405, 999, 999},
131  {__LINE__, 1.000, 0, 9999},
132  {__LINE__, 0.997477, 0.00001, 1},
133  {__LINE__, 0, 5823.21, 5040},
134  };
135  for (const auto& spec : specs) {
136  SCOPED_TRACE(spec.line);
137  const double pval = ChiSquarePValue(spec.chi_square, spec.df);
138  EXPECT_NEAR(spec.pval, pval, 1e-3);
139  }
140 }
141 
142 TEST(ChiSquareTest, CalcChiSquare) {
143  struct {
144  int line;
145  std::vector<int> expected;
146  std::vector<int> actual;
147  } const specs[] = {
148  {__LINE__,
149  {56, 234, 76, 1, 546, 1, 87, 345, 1, 234},
150  {2, 132, 4, 43, 234, 8, 345, 8, 236, 56}},
151  {__LINE__,
152  {123, 36, 234, 367, 345, 2, 456, 567, 234, 567},
153  {123, 56, 2345, 8, 345, 8, 2345, 23, 48, 267}},
154  {__LINE__,
155  {123, 234, 345, 456, 567, 678, 789, 890, 98, 76},
156  {123, 234, 345, 456, 567, 678, 789, 890, 98, 76}},
157  {__LINE__, {3, 675, 23, 86, 2, 8, 2}, {456, 675, 23, 86, 23, 65, 2}},
158  {__LINE__, {1}, {23}},
159  };
160  for (const auto& spec : specs) {
161  SCOPED_TRACE(spec.line);
162  double chi_square = 0;
163  for (int i = 0; i < spec.expected.size(); ++i) {
164  const double diff = spec.actual[i] - spec.expected[i];
165  chi_square += (diff * diff) / spec.expected[i];
166  }
167  EXPECT_NEAR(chi_square,
168  ChiSquare(std::begin(spec.actual), std::end(spec.actual),
169  std::begin(spec.expected), std::end(spec.expected)),
170  1e-5);
171  }
172 }
173 
174 TEST(ChiSquareTest, CalcChiSquareInt64) {
175  const int64_t data[3] = {910293487, 910292491, 910216780};
176  // $ python -c "import scipy.stats
177  // > print scipy.stats.chisquare([910293487, 910292491, 910216780])[0]"
178  // 4.25410123524
179  double sum = std::accumulate(std::begin(data), std::end(data), double{0});
180  size_t n = std::distance(std::begin(data), std::end(data));
182  EXPECT_NEAR(4.254101, a, 1e-6);
183 
184  // ... Or with known values.
185  double b =
187  EXPECT_NEAR(4.254101, b, 1e-6);
188 }
189 
190 TEST(ChiSquareTest, TableData) {
191  // Test data from
192  // http://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
193  // 0.90 0.95 0.975 0.99 0.999
194  const double data[100][5] = {
195  /* 1*/ {2.706, 3.841, 5.024, 6.635, 10.828},
196  /* 2*/ {4.605, 5.991, 7.378, 9.210, 13.816},
197  /* 3*/ {6.251, 7.815, 9.348, 11.345, 16.266},
198  /* 4*/ {7.779, 9.488, 11.143, 13.277, 18.467},
199  /* 5*/ {9.236, 11.070, 12.833, 15.086, 20.515},
200  /* 6*/ {10.645, 12.592, 14.449, 16.812, 22.458},
201  /* 7*/ {12.017, 14.067, 16.013, 18.475, 24.322},
202  /* 8*/ {13.362, 15.507, 17.535, 20.090, 26.125},
203  /* 9*/ {14.684, 16.919, 19.023, 21.666, 27.877},
204  /*10*/ {15.987, 18.307, 20.483, 23.209, 29.588},
205  /*11*/ {17.275, 19.675, 21.920, 24.725, 31.264},
206  /*12*/ {18.549, 21.026, 23.337, 26.217, 32.910},
207  /*13*/ {19.812, 22.362, 24.736, 27.688, 34.528},
208  /*14*/ {21.064, 23.685, 26.119, 29.141, 36.123},
209  /*15*/ {22.307, 24.996, 27.488, 30.578, 37.697},
210  /*16*/ {23.542, 26.296, 28.845, 32.000, 39.252},
211  /*17*/ {24.769, 27.587, 30.191, 33.409, 40.790},
212  /*18*/ {25.989, 28.869, 31.526, 34.805, 42.312},
213  /*19*/ {27.204, 30.144, 32.852, 36.191, 43.820},
214  /*20*/ {28.412, 31.410, 34.170, 37.566, 45.315},
215  /*21*/ {29.615, 32.671, 35.479, 38.932, 46.797},
216  /*22*/ {30.813, 33.924, 36.781, 40.289, 48.268},
217  /*23*/ {32.007, 35.172, 38.076, 41.638, 49.728},
218  /*24*/ {33.196, 36.415, 39.364, 42.980, 51.179},
219  /*25*/ {34.382, 37.652, 40.646, 44.314, 52.620},
220  /*26*/ {35.563, 38.885, 41.923, 45.642, 54.052},
221  /*27*/ {36.741, 40.113, 43.195, 46.963, 55.476},
222  /*28*/ {37.916, 41.337, 44.461, 48.278, 56.892},
223  /*29*/ {39.087, 42.557, 45.722, 49.588, 58.301},
224  /*30*/ {40.256, 43.773, 46.979, 50.892, 59.703},
225  /*31*/ {41.422, 44.985, 48.232, 52.191, 61.098},
226  /*32*/ {42.585, 46.194, 49.480, 53.486, 62.487},
227  /*33*/ {43.745, 47.400, 50.725, 54.776, 63.870},
228  /*34*/ {44.903, 48.602, 51.966, 56.061, 65.247},
229  /*35*/ {46.059, 49.802, 53.203, 57.342, 66.619},
230  /*36*/ {47.212, 50.998, 54.437, 58.619, 67.985},
231  /*37*/ {48.363, 52.192, 55.668, 59.893, 69.347},
232  /*38*/ {49.513, 53.384, 56.896, 61.162, 70.703},
233  /*39*/ {50.660, 54.572, 58.120, 62.428, 72.055},
234  /*40*/ {51.805, 55.758, 59.342, 63.691, 73.402},
235  /*41*/ {52.949, 56.942, 60.561, 64.950, 74.745},
236  /*42*/ {54.090, 58.124, 61.777, 66.206, 76.084},
237  /*43*/ {55.230, 59.304, 62.990, 67.459, 77.419},
238  /*44*/ {56.369, 60.481, 64.201, 68.710, 78.750},
239  /*45*/ {57.505, 61.656, 65.410, 69.957, 80.077},
240  /*46*/ {58.641, 62.830, 66.617, 71.201, 81.400},
241  /*47*/ {59.774, 64.001, 67.821, 72.443, 82.720},
242  /*48*/ {60.907, 65.171, 69.023, 73.683, 84.037},
243  /*49*/ {62.038, 66.339, 70.222, 74.919, 85.351},
244  /*50*/ {63.167, 67.505, 71.420, 76.154, 86.661},
245  /*51*/ {64.295, 68.669, 72.616, 77.386, 87.968},
246  /*52*/ {65.422, 69.832, 73.810, 78.616, 89.272},
247  /*53*/ {66.548, 70.993, 75.002, 79.843, 90.573},
248  /*54*/ {67.673, 72.153, 76.192, 81.069, 91.872},
249  /*55*/ {68.796, 73.311, 77.380, 82.292, 93.168},
250  /*56*/ {69.919, 74.468, 78.567, 83.513, 94.461},
251  /*57*/ {71.040, 75.624, 79.752, 84.733, 95.751},
252  /*58*/ {72.160, 76.778, 80.936, 85.950, 97.039},
253  /*59*/ {73.279, 77.931, 82.117, 87.166, 98.324},
254  /*60*/ {74.397, 79.082, 83.298, 88.379, 99.607},
255  /*61*/ {75.514, 80.232, 84.476, 89.591, 100.888},
256  /*62*/ {76.630, 81.381, 85.654, 90.802, 102.166},
257  /*63*/ {77.745, 82.529, 86.830, 92.010, 103.442},
258  /*64*/ {78.860, 83.675, 88.004, 93.217, 104.716},
259  /*65*/ {79.973, 84.821, 89.177, 94.422, 105.988},
260  /*66*/ {81.085, 85.965, 90.349, 95.626, 107.258},
261  /*67*/ {82.197, 87.108, 91.519, 96.828, 108.526},
262  /*68*/ {83.308, 88.250, 92.689, 98.028, 109.791},
263  /*69*/ {84.418, 89.391, 93.856, 99.228, 111.055},
264  /*70*/ {85.527, 90.531, 95.023, 100.425, 112.317},
265  /*71*/ {86.635, 91.670, 96.189, 101.621, 113.577},
266  /*72*/ {87.743, 92.808, 97.353, 102.816, 114.835},
267  /*73*/ {88.850, 93.945, 98.516, 104.010, 116.092},
268  /*74*/ {89.956, 95.081, 99.678, 105.202, 117.346},
269  /*75*/ {91.061, 96.217, 100.839, 106.393, 118.599},
270  /*76*/ {92.166, 97.351, 101.999, 107.583, 119.850},
271  /*77*/ {93.270, 98.484, 103.158, 108.771, 121.100},
272  /*78*/ {94.374, 99.617, 104.316, 109.958, 122.348},
273  /*79*/ {95.476, 100.749, 105.473, 111.144, 123.594},
274  /*80*/ {96.578, 101.879, 106.629, 112.329, 124.839},
275  /*81*/ {97.680, 103.010, 107.783, 113.512, 126.083},
276  /*82*/ {98.780, 104.139, 108.937, 114.695, 127.324},
277  /*83*/ {99.880, 105.267, 110.090, 115.876, 128.565},
278  /*84*/ {100.980, 106.395, 111.242, 117.057, 129.804},
279  /*85*/ {102.079, 107.522, 112.393, 118.236, 131.041},
280  /*86*/ {103.177, 108.648, 113.544, 119.414, 132.277},
281  /*87*/ {104.275, 109.773, 114.693, 120.591, 133.512},
282  /*88*/ {105.372, 110.898, 115.841, 121.767, 134.746},
283  /*89*/ {106.469, 112.022, 116.989, 122.942, 135.978},
284  /*90*/ {107.565, 113.145, 118.136, 124.116, 137.208},
285  /*91*/ {108.661, 114.268, 119.282, 125.289, 138.438},
286  /*92*/ {109.756, 115.390, 120.427, 126.462, 139.666},
287  /*93*/ {110.850, 116.511, 121.571, 127.633, 140.893},
288  /*94*/ {111.944, 117.632, 122.715, 128.803, 142.119},
289  /*95*/ {113.038, 118.752, 123.858, 129.973, 143.344},
290  /*96*/ {114.131, 119.871, 125.000, 131.141, 144.567},
291  /*97*/ {115.223, 120.990, 126.141, 132.309, 145.789},
292  /*98*/ {116.315, 122.108, 127.282, 133.476, 147.010},
293  /*99*/ {117.407, 123.225, 128.422, 134.642, 148.230},
294  /*100*/ {118.498, 124.342, 129.561, 135.807, 149.449}
295  };
296 
297  // 0.90 0.95 0.975 0.99 0.999
298  for (int i = 0; i < ABSL_ARRAYSIZE(data); i++) {
299  const double E = 0.0001;
300  EXPECT_NEAR(ChiSquarePValue(data[i][0], i + 1), 0.10, E)
301  << i << " " << data[i][0];
302  EXPECT_NEAR(ChiSquarePValue(data[i][1], i + 1), 0.05, E)
303  << i << " " << data[i][1];
304  EXPECT_NEAR(ChiSquarePValue(data[i][2], i + 1), 0.025, E)
305  << i << " " << data[i][2];
306  EXPECT_NEAR(ChiSquarePValue(data[i][3], i + 1), 0.01, E)
307  << i << " " << data[i][3];
308  EXPECT_NEAR(ChiSquarePValue(data[i][4], i + 1), 0.001, E)
309  << i << " " << data[i][4];
310 
311  const double F = 0.1;
312  EXPECT_NEAR(ChiSquareValue(i + 1, 0.90), data[i][0], F) << i;
313  EXPECT_NEAR(ChiSquareValue(i + 1, 0.95), data[i][1], F) << i;
314  EXPECT_NEAR(ChiSquareValue(i + 1, 0.975), data[i][2], F) << i;
315  EXPECT_NEAR(ChiSquareValue(i + 1, 0.99), data[i][3], F) << i;
316  EXPECT_NEAR(ChiSquareValue(i + 1, 0.999), data[i][4], F) << i;
317  }
318 }
319 
320 TEST(ChiSquareTest, ChiSquareTwoIterator) {
321  // Test data from http://www.stat.yale.edu/Courses/1997-98/101/chigf.htm
322  // Null-hypothesis: This data is normally distributed.
323  const int counts[10] = {6, 6, 18, 33, 38, 38, 28, 21, 9, 3};
324  const double expected[10] = {4.6, 8.8, 18.4, 30.0, 38.2,
325  38.2, 30.0, 18.4, 8.8, 4.6};
326  double chi_square = ChiSquare(std::begin(counts), std::end(counts),
327  std::begin(expected), std::end(expected));
328  EXPECT_NEAR(chi_square, 2.69, 0.001);
329 
330  // Degrees of freedom: 10 bins. two estimated parameters. = 10 - 2 - 1.
331  const int dof = 7;
332  // The critical value of 7, 95% => 14.067 (see above test)
333  double p_value_05 = ChiSquarePValue(14.067, dof);
334  EXPECT_NEAR(p_value_05, 0.05, 0.001); // 95%-ile p-value
335 
336  double p_actual = ChiSquarePValue(chi_square, dof);
337  EXPECT_GT(p_actual, 0.05); // Accept the null hypothesis.
338 }
339 
340 TEST(ChiSquareTest, DiceRolls) {
341  // Assume we are testing 102 fair dice rolls.
342  // Null-hypothesis: This data is fairly distributed.
343  //
344  // The dof value of 4, @95% = 9.488 (see above test)
345  // The dof value of 5, @95% = 11.070
346  const int rolls[6] = {22, 11, 17, 14, 20, 18};
347  double sum = std::accumulate(std::begin(rolls), std::end(rolls), double{0});
348  size_t n = std::distance(std::begin(rolls), std::end(rolls));
349 
350  double a = ChiSquareWithExpected(std::begin(rolls), std::end(rolls), sum / n);
351  EXPECT_NEAR(a, 4.70588, 1e-5);
352  EXPECT_LT(a, ChiSquareValue(4, 0.95));
353 
354  double p_a = ChiSquarePValue(a, 4);
355  EXPECT_NEAR(p_a, 0.318828, 1e-5); // Accept the null hypothesis.
356 
357  double b = ChiSquareWithExpected(std::begin(rolls), std::end(rolls), 17.0);
358  EXPECT_NEAR(b, 4.70588, 1e-5);
359  EXPECT_LT(b, ChiSquareValue(5, 0.95));
360 
361  double p_b = ChiSquarePValue(b, 5);
362  EXPECT_NEAR(p_b, 0.4528180, 1e-5); // Accept the null hypothesis.
363 }
364 
365 } // namespace
absl::FormatConversionChar::E
@ E
begin
char * begin
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1007
grpc::testing::sum
double sum(const T &container, F functor)
Definition: test/cpp/qps/stats.h:30
EXPECT_GT
#define EXPECT_GT(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2036
error_ref_leak.err
err
Definition: error_ref_leak.py:35
a
int a
Definition: abseil-cpp/absl/container/internal/hash_policy_traits_test.cc:88
ABSL_ARRAYSIZE
#define ABSL_ARRAYSIZE(array)
Definition: abseil-cpp/absl/base/macros.h:44
run_interop_tests.spec
def spec
Definition: run_interop_tests.py:1394
absl::random_internal::ChiSquareWithExpected
double ChiSquareWithExpected(Iterator begin, Iterator end, double expected)
Definition: abseil-cpp/absl/random/internal/chi_square.h:40
SCOPED_TRACE
#define SCOPED_TRACE(message)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2264
end
char * end
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1008
int64_t
signed __int64 int64_t
Definition: stdint-msvc2008.h:89
TEST
#define TEST(name, init_size,...)
Definition: arena_test.cc:75
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
bm_diff.diff
diff
Definition: bm_diff.py:274
absl::random_internal::ChiSquarePValue
double ChiSquarePValue(double chi_square, int dof)
Definition: abseil-cpp/absl/random/internal/chi_square.cc:157
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
b
uint64_t b
Definition: abseil-cpp/absl/container/internal/layout_test.cc:53
F
#define F(b, c, d)
Definition: md4.c:112
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
EXPECT_LT
#define EXPECT_LT(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2032
accumulate
static void accumulate(upb_pb_encoder *e)
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/upb.c:7694
regen-readme.line
line
Definition: regen-readme.py:30
absl::random_internal::ChiSquareValue
double ChiSquareValue(int dof, double p)
Definition: abseil-cpp/absl/random/internal/chi_square.cc:106
EXPECT_NEAR
#define EXPECT_NEAR(val1, val2, abs_error)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2143
Value
struct Value Value
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/protobuf.h:676
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
absl::random_internal::ChiSquare
double ChiSquare(Iterator it, Iterator end, Expected eit, Expected eend)
Definition: abseil-cpp/absl/random/internal/chi_square.h:56


grpc
Author(s):
autogenerated on Fri May 16 2025 02:57:53