tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

stats.h (5488B)


      1 // Copyright 2024 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //     https://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #ifndef HIGHWAY_HWY_STATS_H_
     17 #define HIGHWAY_HWY_STATS_H_
     18 
     19 #include <stdint.h>
     20 #include <stdio.h>
     21 
     22 #include <cmath>
     23 #include <string>
     24 
     25 #include "hwy/base.h"  // HWY_ASSERT
     26 
     27 namespace hwy {
     28 
     29 // Thread-compatible.
     30 template <size_t N>
     31 class Bins {
     32 public:
     33  Bins() { Reset(); }
     34 
     35  template <typename T>
     36  void Notify(T bin) {
     37    HWY_ASSERT(T{0} <= bin && bin < static_cast<T>(N));
     38    counts_[static_cast<int32_t>(bin)]++;
     39  }
     40 
     41  void Assimilate(const Bins<N>& other) {
     42    for (size_t i = 0; i < N; ++i) {
     43      counts_[i] += other.counts_[i];
     44    }
     45  }
     46 
     47  void Print(const char* caption) const {
     48    fprintf(stderr, "\n%s [%zu]\n", caption, N);
     49    size_t last_nonzero = 0;
     50    for (size_t i = N - 1; i < N; --i) {
     51      if (counts_[i] != 0) {
     52        last_nonzero = i;
     53        break;
     54      }
     55    }
     56    for (size_t i = 0; i <= last_nonzero; ++i) {
     57      fprintf(stderr, "  %zu\n", counts_[i]);
     58    }
     59  }
     60 
     61  void Reset() {
     62    for (size_t i = 0; i < N; ++i) {
     63      counts_[i] = 0;
     64    }
     65  }
     66 
     67 private:
     68  size_t counts_[N];
     69 };
     70 
     71 // Descriptive statistics of a variable (4 moments). Thread-compatible.
     72 class Stats {
     73 public:
     74  Stats() { Reset(); }
     75 
     76  void Notify(const float x) {
     77    ++n_;
     78 
     79    min_ = HWY_MIN(min_, x);
     80    max_ = HWY_MAX(max_, x);
     81 
     82    // Logarithmic transform avoids/delays underflow and overflow.
     83    sum_log_ += std::log(static_cast<double>(x));
     84 
     85    // Online moments.
     86 // Reference: https://www.thinkbrg.com/media/publication/720_McCrary_ImplementingAlgorithms_Whitepaper_20151119_WEB.pdf
     87    const double d = x - m1_;
     88    const double d_div_n = d / static_cast<double>(n_);
     89    const double d2n1_div_n = d * (static_cast<double>(n_) - 1) * d_div_n;
     90    const int64_t n_poly = n_ * n_ - 3 * n_ + 3;
     91    m1_ += d_div_n;
     92    m4_ += d_div_n *
     93           (d_div_n * (d2n1_div_n * static_cast<double>(n_poly) + 6.0 * m2_) -
     94            4.0 * m3_);
     95    m3_ += d_div_n * (d2n1_div_n * (static_cast<double>(n_) - 2) - 3.0 * m2_);
     96    m2_ += d2n1_div_n;
     97  }
     98 
     99  void Assimilate(const Stats& other);
    100 
    101  int64_t Count() const { return n_; }
    102 
    103  float Min() const { return min_; }
    104  float Max() const { return max_; }
    105 
    106  double GeometricMean() const {
    107    return n_ == 0 ? 0.0 : std::exp(sum_log_ / static_cast<double>(n_));
    108  }
    109 
    110  double Mean() const { return m1_; }
    111  // Same as Mu2. Assumes n_ is large.
    112  double SampleVariance() const {
    113    return n_ == 0 ? 0.0 : m2_ / static_cast<int>(n_);
    114  }
    115  // Unbiased estimator for population variance even for smaller n_.
    116  double Variance() const {
    117    if (n_ == 0) return 0.0;
    118    if (n_ == 1) return m2_;
    119    return m2_ / static_cast<int>(n_ - 1);
    120  }
    121  double StandardDeviation() const { return std::sqrt(Variance()); }
    122  // Near zero for normal distributions; if positive on a unimodal distribution,
    123  // the right tail is fatter. Assumes n_ is large.
    124  double SampleSkewness() const {
    125    if (ScalarAbs(m2_) < 1E-7) return 0.0;
    126    return m3_ * std::sqrt(static_cast<double>(n_)) / std::pow(m2_, 1.5);
    127  }
    128  // Corrected for bias (same as Wikipedia and Minitab but not Excel).
    129  double Skewness() const {
    130    if (n_ == 0) return 0.0;
    131    const double biased = SampleSkewness();
    132    const double r = (static_cast<double>(n_) - 1.0) / static_cast<double>(n_);
    133    return biased * std::pow(r, 1.5);
    134  }
    135  // Near zero for normal distributions; smaller values indicate fewer/smaller
    136  // outliers and larger indicates more/larger outliers. Assumes n_ is large.
    137  double SampleKurtosis() const {
    138    if (ScalarAbs(m2_) < 1E-7) return 0.0;
    139    return m4_ * static_cast<double>(n_) / (m2_ * m2_);
    140  }
    141  // Corrected for bias (same as Wikipedia and Minitab but not Excel).
    142  double Kurtosis() const {
    143    if (n_ == 0) return 0.0;
    144    const double biased = SampleKurtosis();
    145    const double r = (static_cast<double>(n_) - 1.0) / static_cast<double>(n_);
    146    return biased * r * r;
    147  }
    148 
    149  // Central moments, useful for "method of moments"-based parameter estimation
    150  // of a mixture of two Gaussians. Assumes Count() != 0.
    151  double Mu1() const { return m1_; }
    152  double Mu2() const { return m2_ / static_cast<int>(n_); }
    153  double Mu3() const { return m3_ / static_cast<int>(n_); }
    154  double Mu4() const { return m4_ / static_cast<int>(n_); }
    155 
    156  // Which statistics to EXCLUDE in ToString
    157  enum {
    158    kNoCount = 1,
    159    kNoMeanSD = 2,
    160    kNoMinMax = 4,
    161    kNoSkewKurt = 8,
    162    kNoGeomean = 16
    163  };
    164  std::string ToString(int exclude = 0) const;
    165 
    166  void Reset() {
    167    n_ = 0;
    168 
    169    min_ = hwy::HighestValue<float>();
    170    max_ = hwy::LowestValue<float>();
    171 
    172    sum_log_ = 0.0;
    173 
    174    m1_ = 0.0;
    175    m2_ = 0.0;
    176    m3_ = 0.0;
    177    m4_ = 0.0;
    178  }
    179 
    180 private:
    181  int64_t n_;  // signed for faster conversion + safe subtraction
    182 
    183  float min_;
    184  float max_;
    185 
    186  double sum_log_;  // for geomean
    187 
    188  // Moments
    189  double m1_;
    190  double m2_;
    191  double m3_;
    192  double m4_;
    193 };
    194 
    195 }  // namespace hwy
    196 
    197 #endif  // HIGHWAY_HWY_STATS_H_