exponential_distribution_test.cc (14588B)
1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "absl/random/exponential_distribution.h" 16 17 #include <algorithm> 18 #include <cfloat> 19 #include <cmath> 20 #include <cstddef> 21 #include <cstdint> 22 #include <iterator> 23 #include <limits> 24 #include <random> 25 #include <sstream> 26 #include <string> 27 #include <type_traits> 28 #include <vector> 29 30 #include "gmock/gmock.h" 31 #include "gtest/gtest.h" 32 #include "absl/base/macros.h" 33 #include "absl/log/log.h" 34 #include "absl/numeric/internal/representation.h" 35 #include "absl/random/internal/chi_square.h" 36 #include "absl/random/internal/distribution_test_util.h" 37 #include "absl/random/internal/pcg_engine.h" 38 #include "absl/random/internal/sequence_urbg.h" 39 #include "absl/random/random.h" 40 #include "absl/strings/str_cat.h" 41 #include "absl/strings/str_format.h" 42 #include "absl/strings/str_replace.h" 43 #include "absl/strings/strip.h" 44 45 namespace { 46 47 using absl::random_internal::kChiSquared; 48 49 template <typename RealType> 50 class ExponentialDistributionTypedTest : public ::testing::Test {}; 51 52 // double-double arithmetic is not supported well by either GCC or Clang; see 53 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99048, 54 // https://bugs.llvm.org/show_bug.cgi?id=49131, and 55 // https://bugs.llvm.org/show_bug.cgi?id=49132. Don't bother running these tests 56 // with double doubles until compiler support is better. 57 using RealTypes = 58 std::conditional<absl::numeric_internal::IsDoubleDouble(), 59 ::testing::Types<float, double>, 60 ::testing::Types<float, double, long double>>::type; 61 TYPED_TEST_SUITE(ExponentialDistributionTypedTest, RealTypes); 62 63 TYPED_TEST(ExponentialDistributionTypedTest, SerializeTest) { 64 using param_type = 65 typename absl::exponential_distribution<TypeParam>::param_type; 66 67 const TypeParam kParams[] = { 68 // Cases around 1. 69 1, // 70 std::nextafter(TypeParam(1), TypeParam(0)), // 1 - epsilon 71 std::nextafter(TypeParam(1), TypeParam(2)), // 1 + epsilon 72 // Typical cases. 73 TypeParam(1e-8), TypeParam(1e-4), TypeParam(1), TypeParam(2), 74 TypeParam(1e4), TypeParam(1e8), TypeParam(1e20), TypeParam(2.5), 75 // Boundary cases. 76 std::numeric_limits<TypeParam>::max(), 77 std::numeric_limits<TypeParam>::epsilon(), 78 std::nextafter(std::numeric_limits<TypeParam>::min(), 79 TypeParam(1)), // min + epsilon 80 std::numeric_limits<TypeParam>::min(), // smallest normal 81 // There are some errors dealing with denorms on apple platforms. 82 std::numeric_limits<TypeParam>::denorm_min(), // smallest denorm 83 std::numeric_limits<TypeParam>::min() / 2, // denorm 84 std::nextafter(std::numeric_limits<TypeParam>::min(), 85 TypeParam(0)), // denorm_max 86 }; 87 88 constexpr int kCount = 1000; 89 absl::InsecureBitGen gen; 90 91 for (const TypeParam lambda : kParams) { 92 // Some values may be invalid; skip those. 93 if (!std::isfinite(lambda)) continue; 94 ABSL_ASSERT(lambda > 0); 95 96 const param_type param(lambda); 97 98 absl::exponential_distribution<TypeParam> before(lambda); 99 EXPECT_EQ(before.lambda(), param.lambda()); 100 101 { 102 absl::exponential_distribution<TypeParam> via_param(param); 103 EXPECT_EQ(via_param, before); 104 EXPECT_EQ(via_param.param(), before.param()); 105 } 106 107 // Smoke test. 108 auto sample_min = before.max(); 109 auto sample_max = before.min(); 110 for (int i = 0; i < kCount; i++) { 111 auto sample = before(gen); 112 EXPECT_GE(sample, before.min()) << before; 113 EXPECT_LE(sample, before.max()) << before; 114 if (sample > sample_max) sample_max = sample; 115 if (sample < sample_min) sample_min = sample; 116 } 117 if (!std::is_same<TypeParam, long double>::value) { 118 LOG(INFO) << "Range {" << lambda << "}: " << sample_min << ", " 119 << sample_max << ", lambda=" << lambda; 120 } 121 122 std::stringstream ss; 123 ss << before; 124 125 if (!std::isfinite(lambda)) { 126 // Streams do not deserialize inf/nan correctly. 127 continue; 128 } 129 // Validate stream serialization. 130 absl::exponential_distribution<TypeParam> after(34.56f); 131 132 EXPECT_NE(before.lambda(), after.lambda()); 133 EXPECT_NE(before.param(), after.param()); 134 EXPECT_NE(before, after); 135 136 ss >> after; 137 138 EXPECT_EQ(before.lambda(), after.lambda()) // 139 << ss.str() << " " // 140 << (ss.good() ? "good " : "") // 141 << (ss.bad() ? "bad " : "") // 142 << (ss.eof() ? "eof " : "") // 143 << (ss.fail() ? "fail " : ""); 144 } 145 } 146 147 // http://www.itl.nist.gov/div898/handbook/eda/section3/eda3667.htm 148 149 class ExponentialModel { 150 public: 151 explicit ExponentialModel(double lambda) 152 : lambda_(lambda), beta_(1.0 / lambda) {} 153 154 double lambda() const { return lambda_; } 155 156 double mean() const { return beta_; } 157 double variance() const { return beta_ * beta_; } 158 double stddev() const { return std::sqrt(variance()); } 159 double skew() const { return 2; } 160 double kurtosis() const { return 6.0; } 161 162 double CDF(double x) { return 1.0 - std::exp(-lambda_ * x); } 163 164 // The inverse CDF, or PercentPoint function of the distribution 165 double InverseCDF(double p) { 166 ABSL_ASSERT(p >= 0.0); 167 ABSL_ASSERT(p < 1.0); 168 return -beta_ * std::log(1.0 - p); 169 } 170 171 private: 172 const double lambda_; 173 const double beta_; 174 }; 175 176 struct Param { 177 double lambda; 178 double p_fail; 179 int trials; 180 }; 181 182 class ExponentialDistributionTests : public testing::TestWithParam<Param>, 183 public ExponentialModel { 184 public: 185 ExponentialDistributionTests() : ExponentialModel(GetParam().lambda) {} 186 187 // SingleZTest provides a basic z-squared test of the mean vs. expected 188 // mean for data generated by the poisson distribution. 189 template <typename D> 190 bool SingleZTest(const double p, const size_t samples); 191 192 // SingleChiSquaredTest provides a basic chi-squared test of the normal 193 // distribution. 194 template <typename D> 195 double SingleChiSquaredTest(); 196 197 // We use a fixed bit generator for distribution accuracy tests. This allows 198 // these tests to be deterministic, while still testing the qualify of the 199 // implementation. 200 absl::random_internal::pcg64_2018_engine rng_{0x2B7E151628AED2A6}; 201 }; 202 203 template <typename D> 204 bool ExponentialDistributionTests::SingleZTest(const double p, 205 const size_t samples) { 206 D dis(lambda()); 207 208 std::vector<double> data; 209 data.reserve(samples); 210 for (size_t i = 0; i < samples; i++) { 211 const double x = dis(rng_); 212 data.push_back(x); 213 } 214 215 const auto m = absl::random_internal::ComputeDistributionMoments(data); 216 const double max_err = absl::random_internal::MaxErrorTolerance(p); 217 const double z = absl::random_internal::ZScore(mean(), m); 218 const bool pass = absl::random_internal::Near("z", z, 0.0, max_err); 219 220 if (!pass) { 221 // clang-format off 222 LOG(INFO) 223 << "p=" << p << " max_err=" << max_err << "\n" 224 " lambda=" << lambda() << "\n" 225 " mean=" << m.mean << " vs. " << mean() << "\n" 226 " stddev=" << std::sqrt(m.variance) << " vs. " << stddev() << "\n" 227 " skewness=" << m.skewness << " vs. " << skew() << "\n" 228 " kurtosis=" << m.kurtosis << " vs. " << kurtosis() << "\n" 229 " z=" << z << " vs. 0"; 230 // clang-format on 231 } 232 return pass; 233 } 234 235 template <typename D> 236 double ExponentialDistributionTests::SingleChiSquaredTest() { 237 const size_t kSamples = 10000; 238 const int kBuckets = 50; 239 240 // The InverseCDF is the percent point function of the distribution, and can 241 // be used to assign buckets roughly uniformly. 242 std::vector<double> cutoffs; 243 const double kInc = 1.0 / static_cast<double>(kBuckets); 244 for (double p = kInc; p < 1.0; p += kInc) { 245 cutoffs.push_back(InverseCDF(p)); 246 } 247 if (cutoffs.back() != std::numeric_limits<double>::infinity()) { 248 cutoffs.push_back(std::numeric_limits<double>::infinity()); 249 } 250 251 D dis(lambda()); 252 253 std::vector<int32_t> counts(cutoffs.size(), 0); 254 for (int j = 0; j < kSamples; j++) { 255 const double x = dis(rng_); 256 auto it = std::upper_bound(cutoffs.begin(), cutoffs.end(), x); 257 counts[std::distance(cutoffs.begin(), it)]++; 258 } 259 260 // Null-hypothesis is that the distribution is exponentially distributed 261 // with the provided lambda (not estimated from the data). 262 const int dof = static_cast<int>(counts.size()) - 1; 263 264 // Our threshold for logging is 1-in-50. 265 const double threshold = absl::random_internal::ChiSquareValue(dof, 0.98); 266 267 const double expected = 268 static_cast<double>(kSamples) / static_cast<double>(counts.size()); 269 270 double chi_square = absl::random_internal::ChiSquareWithExpected( 271 std::begin(counts), std::end(counts), expected); 272 double p = absl::random_internal::ChiSquarePValue(chi_square, dof); 273 274 if (chi_square > threshold) { 275 for (size_t i = 0; i < cutoffs.size(); i++) { 276 LOG(INFO) << i << " : (" << cutoffs[i] << ") = " << counts[i]; 277 } 278 279 // clang-format off 280 LOG(INFO) << "lambda " << lambda() << "\n" 281 " expected " << expected << "\n" 282 << kChiSquared << " " << chi_square << " (" << p << ")\n" 283 << kChiSquared << " @ 0.98 = " << threshold; 284 // clang-format on 285 } 286 return p; 287 } 288 289 TEST_P(ExponentialDistributionTests, ZTest) { 290 const size_t kSamples = 10000; 291 const auto& param = GetParam(); 292 const int expected_failures = 293 std::max(1, static_cast<int>(std::ceil(param.trials * param.p_fail))); 294 const double p = absl::random_internal::RequiredSuccessProbability( 295 param.p_fail, param.trials); 296 297 int failures = 0; 298 for (int i = 0; i < param.trials; i++) { 299 failures += SingleZTest<absl::exponential_distribution<double>>(p, kSamples) 300 ? 0 301 : 1; 302 } 303 EXPECT_LE(failures, expected_failures); 304 } 305 306 TEST_P(ExponentialDistributionTests, ChiSquaredTest) { 307 const int kTrials = 20; 308 int failures = 0; 309 310 for (int i = 0; i < kTrials; i++) { 311 double p_value = 312 SingleChiSquaredTest<absl::exponential_distribution<double>>(); 313 if (p_value < 0.005) { // 1/200 314 failures++; 315 } 316 } 317 318 // There is a 0.10% chance of producing at least one failure, so raise the 319 // failure threshold high enough to allow for a flake rate < 10,000. 320 EXPECT_LE(failures, 4); 321 } 322 323 std::vector<Param> GenParams() { 324 return { 325 Param{1.0, 0.02, 100}, 326 Param{2.5, 0.02, 100}, 327 Param{10, 0.02, 100}, 328 // large 329 Param{1e4, 0.02, 100}, 330 Param{1e9, 0.02, 100}, 331 // small 332 Param{0.1, 0.02, 100}, 333 Param{1e-3, 0.02, 100}, 334 Param{1e-5, 0.02, 100}, 335 }; 336 } 337 338 std::string ParamName(const ::testing::TestParamInfo<Param>& info) { 339 const auto& p = info.param; 340 std::string name = absl::StrCat("lambda_", absl::SixDigits(p.lambda)); 341 return absl::StrReplaceAll(name, {{"+", "_"}, {"-", "_"}, {".", "_"}}); 342 } 343 344 INSTANTIATE_TEST_SUITE_P(All, ExponentialDistributionTests, 345 ::testing::ValuesIn(GenParams()), ParamName); 346 347 // NOTE: absl::exponential_distribution is not guaranteed to be stable. 348 TEST(ExponentialDistributionTest, StabilityTest) { 349 // absl::exponential_distribution stability relies on std::log1p and 350 // absl::uniform_real_distribution. 351 absl::random_internal::sequence_urbg urbg( 352 {0x0003eb76f6f7f755ull, 0xFFCEA50FDB2F953Bull, 0xC332DDEFBE6C5AA5ull, 353 0x6558218568AB9702ull, 0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull, 354 0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull, 0x0334FE1EAA0363CFull, 355 0xB5735C904C70A239ull, 0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull}); 356 357 std::vector<int> output(14); 358 359 { 360 absl::exponential_distribution<double> dist; 361 std::generate(std::begin(output), std::end(output), 362 [&] { return static_cast<int>(10000.0 * dist(urbg)); }); 363 364 EXPECT_EQ(14, urbg.invocations()); 365 EXPECT_THAT(output, 366 testing::ElementsAre(0, 71913, 14375, 5039, 1835, 861, 25936, 367 804, 126, 12337, 17984, 27002, 0, 71913)); 368 } 369 370 urbg.reset(); 371 { 372 absl::exponential_distribution<float> dist; 373 std::generate(std::begin(output), std::end(output), 374 [&] { return static_cast<int>(10000.0f * dist(urbg)); }); 375 376 EXPECT_EQ(14, urbg.invocations()); 377 EXPECT_THAT(output, 378 testing::ElementsAre(0, 71913, 14375, 5039, 1835, 861, 25936, 379 804, 126, 12337, 17984, 27002, 0, 71913)); 380 } 381 } 382 383 TEST(ExponentialDistributionTest, AlgorithmBounds) { 384 // Relies on absl::uniform_real_distribution, so some of these comments 385 // reference that. 386 387 #if (defined(__i386__) || defined(_M_IX86)) && FLT_EVAL_METHOD != 0 388 // We're using an x87-compatible FPU, and intermediate operations can be 389 // performed with 80-bit floats. This produces slightly different results from 390 // what we expect below. 391 GTEST_SKIP() 392 << "Skipping the test because we detected x87 floating-point semantics"; 393 #endif 394 395 absl::exponential_distribution<double> dist; 396 397 { 398 // This returns the smallest value >0 from absl::uniform_real_distribution. 399 absl::random_internal::sequence_urbg urbg({0x0000000000000001ull}); 400 double a = dist(urbg); 401 EXPECT_EQ(a, 5.42101086242752217004e-20); 402 } 403 404 { 405 // This returns a value very near 0.5 from absl::uniform_real_distribution. 406 absl::random_internal::sequence_urbg urbg({0x7fffffffffffffefull}); 407 double a = dist(urbg); 408 EXPECT_EQ(a, 0.693147180559945175204); 409 } 410 411 { 412 // This returns the largest value <1 from absl::uniform_real_distribution. 413 // WolframAlpha: ~39.1439465808987766283058547296341915292187253 414 absl::random_internal::sequence_urbg urbg({0xFFFFFFFFFFFFFFeFull}); 415 double a = dist(urbg); 416 EXPECT_EQ(a, 36.7368005696771007251); 417 } 418 { 419 // This *ALSO* returns the largest value <1. 420 absl::random_internal::sequence_urbg urbg({0xFFFFFFFFFFFFFFFFull}); 421 double a = dist(urbg); 422 EXPECT_EQ(a, 36.7368005696771007251); 423 } 424 } 425 426 } // namespace