tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

exponential_distribution_test.cc (14588B)


      1 // Copyright 2017 The Abseil Authors.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "absl/random/exponential_distribution.h"
     16 
     17 #include <algorithm>
     18 #include <cfloat>
     19 #include <cmath>
     20 #include <cstddef>
     21 #include <cstdint>
     22 #include <iterator>
     23 #include <limits>
     24 #include <random>
     25 #include <sstream>
     26 #include <string>
     27 #include <type_traits>
     28 #include <vector>
     29 
     30 #include "gmock/gmock.h"
     31 #include "gtest/gtest.h"
     32 #include "absl/base/macros.h"
     33 #include "absl/log/log.h"
     34 #include "absl/numeric/internal/representation.h"
     35 #include "absl/random/internal/chi_square.h"
     36 #include "absl/random/internal/distribution_test_util.h"
     37 #include "absl/random/internal/pcg_engine.h"
     38 #include "absl/random/internal/sequence_urbg.h"
     39 #include "absl/random/random.h"
     40 #include "absl/strings/str_cat.h"
     41 #include "absl/strings/str_format.h"
     42 #include "absl/strings/str_replace.h"
     43 #include "absl/strings/strip.h"
     44 
     45 namespace {
     46 
     47 using absl::random_internal::kChiSquared;
     48 
     49 template <typename RealType>
     50 class ExponentialDistributionTypedTest : public ::testing::Test {};
     51 
     52 // double-double arithmetic is not supported well by either GCC or Clang; see
     53 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99048,
     54 // https://bugs.llvm.org/show_bug.cgi?id=49131, and
     55 // https://bugs.llvm.org/show_bug.cgi?id=49132. Don't bother running these tests
     56 // with double doubles until compiler support is better.
     57 using RealTypes =
     58    std::conditional<absl::numeric_internal::IsDoubleDouble(),
     59                     ::testing::Types<float, double>,
     60                     ::testing::Types<float, double, long double>>::type;
     61 TYPED_TEST_SUITE(ExponentialDistributionTypedTest, RealTypes);
     62 
     63 TYPED_TEST(ExponentialDistributionTypedTest, SerializeTest) {
     64  using param_type =
     65      typename absl::exponential_distribution<TypeParam>::param_type;
     66 
     67  const TypeParam kParams[] = {
     68      // Cases around 1.
     69      1,                                           //
     70      std::nextafter(TypeParam(1), TypeParam(0)),  // 1 - epsilon
     71      std::nextafter(TypeParam(1), TypeParam(2)),  // 1 + epsilon
     72      // Typical cases.
     73      TypeParam(1e-8), TypeParam(1e-4), TypeParam(1), TypeParam(2),
     74      TypeParam(1e4), TypeParam(1e8), TypeParam(1e20), TypeParam(2.5),
     75      // Boundary cases.
     76      std::numeric_limits<TypeParam>::max(),
     77      std::numeric_limits<TypeParam>::epsilon(),
     78      std::nextafter(std::numeric_limits<TypeParam>::min(),
     79                     TypeParam(1)),           // min + epsilon
     80      std::numeric_limits<TypeParam>::min(),  // smallest normal
     81      // There are some errors dealing with denorms on apple platforms.
     82      std::numeric_limits<TypeParam>::denorm_min(),  // smallest denorm
     83      std::numeric_limits<TypeParam>::min() / 2,     // denorm
     84      std::nextafter(std::numeric_limits<TypeParam>::min(),
     85                     TypeParam(0)),  // denorm_max
     86  };
     87 
     88  constexpr int kCount = 1000;
     89  absl::InsecureBitGen gen;
     90 
     91  for (const TypeParam lambda : kParams) {
     92    // Some values may be invalid; skip those.
     93    if (!std::isfinite(lambda)) continue;
     94    ABSL_ASSERT(lambda > 0);
     95 
     96    const param_type param(lambda);
     97 
     98    absl::exponential_distribution<TypeParam> before(lambda);
     99    EXPECT_EQ(before.lambda(), param.lambda());
    100 
    101    {
    102      absl::exponential_distribution<TypeParam> via_param(param);
    103      EXPECT_EQ(via_param, before);
    104      EXPECT_EQ(via_param.param(), before.param());
    105    }
    106 
    107    // Smoke test.
    108    auto sample_min = before.max();
    109    auto sample_max = before.min();
    110    for (int i = 0; i < kCount; i++) {
    111      auto sample = before(gen);
    112      EXPECT_GE(sample, before.min()) << before;
    113      EXPECT_LE(sample, before.max()) << before;
    114      if (sample > sample_max) sample_max = sample;
    115      if (sample < sample_min) sample_min = sample;
    116    }
    117    if (!std::is_same<TypeParam, long double>::value) {
    118      LOG(INFO) << "Range {" << lambda << "}: " << sample_min << ", "
    119                << sample_max << ", lambda=" << lambda;
    120    }
    121 
    122    std::stringstream ss;
    123    ss << before;
    124 
    125    if (!std::isfinite(lambda)) {
    126      // Streams do not deserialize inf/nan correctly.
    127      continue;
    128    }
    129    // Validate stream serialization.
    130    absl::exponential_distribution<TypeParam> after(34.56f);
    131 
    132    EXPECT_NE(before.lambda(), after.lambda());
    133    EXPECT_NE(before.param(), after.param());
    134    EXPECT_NE(before, after);
    135 
    136    ss >> after;
    137 
    138    EXPECT_EQ(before.lambda(), after.lambda())  //
    139        << ss.str() << " "                      //
    140        << (ss.good() ? "good " : "")           //
    141        << (ss.bad() ? "bad " : "")             //
    142        << (ss.eof() ? "eof " : "")             //
    143        << (ss.fail() ? "fail " : "");
    144  }
    145 }
    146 
    147 // http://www.itl.nist.gov/div898/handbook/eda/section3/eda3667.htm
    148 
    149 class ExponentialModel {
    150 public:
    151  explicit ExponentialModel(double lambda)
    152      : lambda_(lambda), beta_(1.0 / lambda) {}
    153 
    154  double lambda() const { return lambda_; }
    155 
    156  double mean() const { return beta_; }
    157  double variance() const { return beta_ * beta_; }
    158  double stddev() const { return std::sqrt(variance()); }
    159  double skew() const { return 2; }
    160  double kurtosis() const { return 6.0; }
    161 
    162  double CDF(double x) { return 1.0 - std::exp(-lambda_ * x); }
    163 
    164  // The inverse CDF, or PercentPoint function of the distribution
    165  double InverseCDF(double p) {
    166    ABSL_ASSERT(p >= 0.0);
    167    ABSL_ASSERT(p < 1.0);
    168    return -beta_ * std::log(1.0 - p);
    169  }
    170 
    171 private:
    172  const double lambda_;
    173  const double beta_;
    174 };
    175 
    176 struct Param {
    177  double lambda;
    178  double p_fail;
    179  int trials;
    180 };
    181 
    182 class ExponentialDistributionTests : public testing::TestWithParam<Param>,
    183                                     public ExponentialModel {
    184 public:
    185  ExponentialDistributionTests() : ExponentialModel(GetParam().lambda) {}
    186 
    187  // SingleZTest provides a basic z-squared test of the mean vs. expected
    188  // mean for data generated by the poisson distribution.
    189  template <typename D>
    190  bool SingleZTest(const double p, const size_t samples);
    191 
    192  // SingleChiSquaredTest provides a basic chi-squared test of the normal
    193  // distribution.
    194  template <typename D>
    195  double SingleChiSquaredTest();
    196 
    197  // We use a fixed bit generator for distribution accuracy tests.  This allows
    198  // these tests to be deterministic, while still testing the qualify of the
    199  // implementation.
    200  absl::random_internal::pcg64_2018_engine rng_{0x2B7E151628AED2A6};
    201 };
    202 
    203 template <typename D>
    204 bool ExponentialDistributionTests::SingleZTest(const double p,
    205                                               const size_t samples) {
    206  D dis(lambda());
    207 
    208  std::vector<double> data;
    209  data.reserve(samples);
    210  for (size_t i = 0; i < samples; i++) {
    211    const double x = dis(rng_);
    212    data.push_back(x);
    213  }
    214 
    215  const auto m = absl::random_internal::ComputeDistributionMoments(data);
    216  const double max_err = absl::random_internal::MaxErrorTolerance(p);
    217  const double z = absl::random_internal::ZScore(mean(), m);
    218  const bool pass = absl::random_internal::Near("z", z, 0.0, max_err);
    219 
    220  if (!pass) {
    221    // clang-format off
    222    LOG(INFO)
    223        << "p=" << p << " max_err=" << max_err << "\n"
    224           " lambda=" << lambda() << "\n"
    225           " mean=" << m.mean << " vs. " << mean() << "\n"
    226           " stddev=" << std::sqrt(m.variance) << " vs. " << stddev() << "\n"
    227           " skewness=" << m.skewness << " vs. " << skew() << "\n"
    228           " kurtosis=" << m.kurtosis << " vs. " << kurtosis() << "\n"
    229           " z=" << z << " vs. 0";
    230    // clang-format on
    231  }
    232  return pass;
    233 }
    234 
    235 template <typename D>
    236 double ExponentialDistributionTests::SingleChiSquaredTest() {
    237  const size_t kSamples = 10000;
    238  const int kBuckets = 50;
    239 
    240  // The InverseCDF is the percent point function of the distribution, and can
    241  // be used to assign buckets roughly uniformly.
    242  std::vector<double> cutoffs;
    243  const double kInc = 1.0 / static_cast<double>(kBuckets);
    244  for (double p = kInc; p < 1.0; p += kInc) {
    245    cutoffs.push_back(InverseCDF(p));
    246  }
    247  if (cutoffs.back() != std::numeric_limits<double>::infinity()) {
    248    cutoffs.push_back(std::numeric_limits<double>::infinity());
    249  }
    250 
    251  D dis(lambda());
    252 
    253  std::vector<int32_t> counts(cutoffs.size(), 0);
    254  for (int j = 0; j < kSamples; j++) {
    255    const double x = dis(rng_);
    256    auto it = std::upper_bound(cutoffs.begin(), cutoffs.end(), x);
    257    counts[std::distance(cutoffs.begin(), it)]++;
    258  }
    259 
    260  // Null-hypothesis is that the distribution is exponentially distributed
    261  // with the provided lambda (not estimated from the data).
    262  const int dof = static_cast<int>(counts.size()) - 1;
    263 
    264  // Our threshold for logging is 1-in-50.
    265  const double threshold = absl::random_internal::ChiSquareValue(dof, 0.98);
    266 
    267  const double expected =
    268      static_cast<double>(kSamples) / static_cast<double>(counts.size());
    269 
    270  double chi_square = absl::random_internal::ChiSquareWithExpected(
    271      std::begin(counts), std::end(counts), expected);
    272  double p = absl::random_internal::ChiSquarePValue(chi_square, dof);
    273 
    274  if (chi_square > threshold) {
    275    for (size_t i = 0; i < cutoffs.size(); i++) {
    276      LOG(INFO) << i << " : (" << cutoffs[i] << ") = " << counts[i];
    277    }
    278 
    279    // clang-format off
    280    LOG(INFO) << "lambda " << lambda() << "\n"
    281                 " expected " << expected << "\n"
    282              << kChiSquared << " " << chi_square << " (" << p << ")\n"
    283              << kChiSquared << " @ 0.98 = " << threshold;
    284    // clang-format on
    285  }
    286  return p;
    287 }
    288 
    289 TEST_P(ExponentialDistributionTests, ZTest) {
    290  const size_t kSamples = 10000;
    291  const auto& param = GetParam();
    292  const int expected_failures =
    293      std::max(1, static_cast<int>(std::ceil(param.trials * param.p_fail)));
    294  const double p = absl::random_internal::RequiredSuccessProbability(
    295      param.p_fail, param.trials);
    296 
    297  int failures = 0;
    298  for (int i = 0; i < param.trials; i++) {
    299    failures += SingleZTest<absl::exponential_distribution<double>>(p, kSamples)
    300                    ? 0
    301                    : 1;
    302  }
    303  EXPECT_LE(failures, expected_failures);
    304 }
    305 
    306 TEST_P(ExponentialDistributionTests, ChiSquaredTest) {
    307  const int kTrials = 20;
    308  int failures = 0;
    309 
    310  for (int i = 0; i < kTrials; i++) {
    311    double p_value =
    312        SingleChiSquaredTest<absl::exponential_distribution<double>>();
    313    if (p_value < 0.005) {  // 1/200
    314      failures++;
    315    }
    316  }
    317 
    318  // There is a 0.10% chance of producing at least one failure, so raise the
    319  // failure threshold high enough to allow for a flake rate < 10,000.
    320  EXPECT_LE(failures, 4);
    321 }
    322 
    323 std::vector<Param> GenParams() {
    324  return {
    325      Param{1.0, 0.02, 100},
    326      Param{2.5, 0.02, 100},
    327      Param{10, 0.02, 100},
    328      // large
    329      Param{1e4, 0.02, 100},
    330      Param{1e9, 0.02, 100},
    331      // small
    332      Param{0.1, 0.02, 100},
    333      Param{1e-3, 0.02, 100},
    334      Param{1e-5, 0.02, 100},
    335  };
    336 }
    337 
    338 std::string ParamName(const ::testing::TestParamInfo<Param>& info) {
    339  const auto& p = info.param;
    340  std::string name = absl::StrCat("lambda_", absl::SixDigits(p.lambda));
    341  return absl::StrReplaceAll(name, {{"+", "_"}, {"-", "_"}, {".", "_"}});
    342 }
    343 
    344 INSTANTIATE_TEST_SUITE_P(All, ExponentialDistributionTests,
    345                         ::testing::ValuesIn(GenParams()), ParamName);
    346 
    347 // NOTE: absl::exponential_distribution is not guaranteed to be stable.
    348 TEST(ExponentialDistributionTest, StabilityTest) {
    349  // absl::exponential_distribution stability relies on std::log1p and
    350  // absl::uniform_real_distribution.
    351  absl::random_internal::sequence_urbg urbg(
    352      {0x0003eb76f6f7f755ull, 0xFFCEA50FDB2F953Bull, 0xC332DDEFBE6C5AA5ull,
    353       0x6558218568AB9702ull, 0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull,
    354       0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull, 0x0334FE1EAA0363CFull,
    355       0xB5735C904C70A239ull, 0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull});
    356 
    357  std::vector<int> output(14);
    358 
    359  {
    360    absl::exponential_distribution<double> dist;
    361    std::generate(std::begin(output), std::end(output),
    362                  [&] { return static_cast<int>(10000.0 * dist(urbg)); });
    363 
    364    EXPECT_EQ(14, urbg.invocations());
    365    EXPECT_THAT(output,
    366                testing::ElementsAre(0, 71913, 14375, 5039, 1835, 861, 25936,
    367                                     804, 126, 12337, 17984, 27002, 0, 71913));
    368  }
    369 
    370  urbg.reset();
    371  {
    372    absl::exponential_distribution<float> dist;
    373    std::generate(std::begin(output), std::end(output),
    374                  [&] { return static_cast<int>(10000.0f * dist(urbg)); });
    375 
    376    EXPECT_EQ(14, urbg.invocations());
    377    EXPECT_THAT(output,
    378                testing::ElementsAre(0, 71913, 14375, 5039, 1835, 861, 25936,
    379                                     804, 126, 12337, 17984, 27002, 0, 71913));
    380  }
    381 }
    382 
    383 TEST(ExponentialDistributionTest, AlgorithmBounds) {
    384  // Relies on absl::uniform_real_distribution, so some of these comments
    385  // reference that.
    386 
    387 #if (defined(__i386__) || defined(_M_IX86)) && FLT_EVAL_METHOD != 0
    388  // We're using an x87-compatible FPU, and intermediate operations can be
    389  // performed with 80-bit floats. This produces slightly different results from
    390  // what we expect below.
    391  GTEST_SKIP()
    392      << "Skipping the test because we detected x87 floating-point semantics";
    393 #endif
    394 
    395  absl::exponential_distribution<double> dist;
    396 
    397  {
    398    // This returns the smallest value >0 from absl::uniform_real_distribution.
    399    absl::random_internal::sequence_urbg urbg({0x0000000000000001ull});
    400    double a = dist(urbg);
    401    EXPECT_EQ(a, 5.42101086242752217004e-20);
    402  }
    403 
    404  {
    405    // This returns a value very near 0.5 from absl::uniform_real_distribution.
    406    absl::random_internal::sequence_urbg urbg({0x7fffffffffffffefull});
    407    double a = dist(urbg);
    408    EXPECT_EQ(a, 0.693147180559945175204);
    409  }
    410 
    411  {
    412    // This returns the largest value <1 from absl::uniform_real_distribution.
    413    // WolframAlpha: ~39.1439465808987766283058547296341915292187253
    414    absl::random_internal::sequence_urbg urbg({0xFFFFFFFFFFFFFFeFull});
    415    double a = dist(urbg);
    416    EXPECT_EQ(a, 36.7368005696771007251);
    417  }
    418  {
    419    // This *ALSO* returns the largest value <1.
    420    absl::random_internal::sequence_urbg urbg({0xFFFFFFFFFFFFFFFFull});
    421    double a = dist(urbg);
    422    EXPECT_EQ(a, 36.7368005696771007251);
    423  }
    424 }
    425 
    426 }  // namespace