tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

histogram.h (16270B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      4 // Use of this source code is governed by a BSD-style license that can be
      5 // found in the LICENSE file.
      6 
      7 // Histogram is an object that aggregates statistics, and can summarize them in
      8 // various forms, including ASCII graphical, HTML, and numerically (as a
      9 // vector of numbers corresponding to each of the aggregating buckets).
     10 
     11 // It supports calls to accumulate either time intervals (which are processed
     12 // as integral number of milliseconds), or arbitrary integral units.
     13 
     14 // The default layout of buckets is exponential.  For example, buckets might
     15 // contain (sequentially) the count of values in the following intervals:
     16 // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity)
     17 // That bucket allocation would actually result from construction of a histogram
     18 // for values between 1 and 64, with 8 buckets, such as:
     19 // Histogram count(L"some name", 1, 64, 8);
     20 // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity)
     21 // are not counted by the constructor in the user supplied "bucket_count"
     22 // argument.
     23 // The above example has an exponential ratio of 2 (doubling the bucket width
     24 // in each consecutive bucket.  The Histogram class automatically calculates
     25 // the smallest ratio that it can use to construct the number of buckets
     26 // selected in the constructor.  An another example, if you had 50 buckets,
     27 // and millisecond time values from 1 to 10000, then the ratio between
     28 // consecutive bucket widths will be approximately somewhere around the 50th
     29 // root of 10000.  This approach provides very fine grain (narrow) buckets
     30 // at the low end of the histogram scale, but allows the histogram to cover a
     31 // gigantic range with the addition of very few buckets.
     32 
     33 // Histograms use a pattern involving a function static variable, that is a
     34 // pointer to a histogram.  This static is explicitly initialized on any thread
     35 // that detects a uninitialized (NULL) pointer.  The potentially racy
     36 // initialization is not a problem as it is always set to point to the same
     37 // value (i.e., the FactoryGet always returns the same value).  FactoryGet
     38 // is also completely thread safe, which results in a completely thread safe,
     39 // and relatively fast, set of counters.  To avoid races at shutdown, the static
     40 // pointer is NOT deleted, and we leak the histograms at process termination.
     41 
     42 #ifndef BASE_METRICS_HISTOGRAM_H_
     43 #define BASE_METRICS_HISTOGRAM_H_
     44 #pragma once
     45 
     46 #include "mozilla/MemoryReporting.h"
     47 
     48 #include <map>
     49 #include <string>
     50 
     51 #include "base/time.h"
     52 
     53 #include "nsTArray.h"
     54 
     55 namespace base {
     56 
     57 //------------------------------------------------------------------------------
     58 
     59 class BooleanHistogram;
     60 class CustomHistogram;
     61 class Histogram;
     62 class LinearHistogram;
     63 
     64 class Histogram {
     65 public:
     66  typedef int Sample;  // Used for samples (and ranges of samples).
     67  typedef int Count;   // Used to count samples in a bucket.
     68  static const Sample kSampleType_MAX = INT_MAX;
     69  // Initialize maximum number of buckets in histograms as 16,384.
     70  static const size_t kBucketCount_MAX;
     71 
     72  typedef nsTArray<Count> Counts;
     73  typedef const Sample* Ranges;
     74 
     75  // These enums are used to facilitate deserialization of renderer histograms
     76  // into the browser.
     77  enum ClassType {
     78    HISTOGRAM,
     79    LINEAR_HISTOGRAM,
     80    BOOLEAN_HISTOGRAM,
     81    FLAG_HISTOGRAM,
     82    COUNT_HISTOGRAM,
     83    CUSTOM_HISTOGRAM,
     84    NOT_VALID_IN_RENDERER
     85  };
     86 
     87  enum BucketLayout { EXPONENTIAL, LINEAR, CUSTOM };
     88 
     89  enum Flags {
     90    kNoFlags = 0,
     91    kUmaTargetedHistogramFlag = 0x1,  // Histogram should be UMA uploaded.
     92 
     93    kHexRangePrintingFlag = 0x8000  // Fancy bucket-naming supported.
     94  };
     95 
     96  enum Inconsistencies {
     97    NO_INCONSISTENCIES = 0x0,
     98    RANGE_CHECKSUM_ERROR = 0x1,
     99    BUCKET_ORDER_ERROR = 0x2,
    100    COUNT_HIGH_ERROR = 0x4,
    101    COUNT_LOW_ERROR = 0x8,
    102 
    103    NEVER_EXCEEDED_VALUE = 0x10
    104  };
    105 
    106  struct DescriptionPair {
    107    Sample sample;
    108    const char* description;  // Null means end of a list of pairs.
    109  };
    110 
    111  size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf);
    112 
    113  bool is_empty() const {
    114    return this->sample_.counts(0) == 0 && this->sample_.sum() == 0;
    115  }
    116 
    117  //----------------------------------------------------------------------------
    118  // Statistic values, developed over the life of the histogram.
    119 
    120  class SampleSet {
    121   public:
    122    explicit SampleSet();
    123    ~SampleSet();
    124    SampleSet(SampleSet&&) = default;
    125 
    126    // None of the methods in this class are thread-safe.  Callers
    127    // must deal with locking themselves.
    128 
    129    // Adjust size of counts_ for use with given histogram.
    130    void Resize(const Histogram& histogram);
    131 
    132    // Accessor for histogram to make routine additions.
    133    void Accumulate(Sample value, Count count, size_t index);
    134 
    135    // Arithmetic manipulation of corresponding elements of the set.
    136    void Add(const SampleSet& other);
    137 
    138    size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf);
    139 
    140    Count counts(size_t i) const { return counts_[i]; }
    141    Count TotalCount() const;
    142    int64_t sum() const { return sum_; }
    143    int64_t redundant_count() const { return redundant_count_; }
    144    size_t size() const { return counts_.Length(); }
    145    SampleSet Clone() const {
    146      SampleSet result;
    147      result.counts_ = counts_.Clone();
    148      result.sum_ = sum_;
    149      result.redundant_count_ = redundant_count_;
    150      return result;
    151    }
    152    void Clear() {
    153      sum_ = 0;
    154      redundant_count_ = 0;
    155      for (int& i : counts_) {
    156        i = 0;
    157      }
    158    }
    159 
    160   protected:
    161    // Actual histogram data is stored in buckets, showing the count of values
    162    // that fit into each bucket.
    163    Counts counts_;
    164 
    165    // Save simple stats locally.  Note that this MIGHT get done in base class
    166    // without shared memory at some point.
    167    int64_t sum_;  // sum of samples.
    168 
    169    // To help identify memory corruption, we reduntantly save the number of
    170    // samples we've accumulated into all of our buckets.  We can compare this
    171    // count to the sum of the counts in all buckets, and detect problems.  Note
    172    // that due to races in histogram accumulation (if a histogram is indeed
    173    // updated on several threads simultaneously), the tallies might mismatch,
    174    // and also the snapshotting code may asynchronously get a mismatch (though
    175    // generally either race based mismatch cause is VERY rare).
    176    int64_t redundant_count_;
    177  };
    178 
    179  //----------------------------------------------------------------------------
    180  // minimum should start from 1. 0 is invalid as a minimum. 0 is an implicit
    181  // default underflow bucket.
    182  static Histogram* FactoryGet(Sample minimum, Sample maximum,
    183                               size_t bucket_count, Flags flags,
    184                               const int* buckets);
    185 
    186  virtual ~Histogram();
    187 
    188  void Add(int value);
    189  void Subtract(int value);
    190 
    191  // This method is an interface, used only by BooleanHistogram.
    192  virtual void AddBoolean(bool value);
    193 
    194  // Accept a TimeDelta to increment.
    195  void AddTime(TimeDelta time) { Add(static_cast<int>(time.InMilliseconds())); }
    196 
    197  virtual void AddSampleSet(const SampleSet& sample);
    198 
    199  virtual void Clear();
    200 
    201  // This method is an interface, used only by LinearHistogram.
    202  virtual void SetRangeDescriptions(const DescriptionPair descriptions[]);
    203 
    204  // Support generic flagging of Histograms.
    205  // 0x1 Currently used to mark this histogram to be recorded by UMA..
    206  // 0x8000 means print ranges in hex.
    207  void SetFlags(Flags flags) { flags_ = static_cast<Flags>(flags_ | flags); }
    208  void ClearFlags(Flags flags) { flags_ = static_cast<Flags>(flags_ & ~flags); }
    209  int flags() const { return flags_; }
    210 
    211  // Check to see if bucket ranges, counts and tallies in the snapshot are
    212  // consistent with the bucket ranges and checksums in our histogram.  This can
    213  // produce a false-alarm if a race occurred in the reading of the data during
    214  // a SnapShot process, but should otherwise be false at all times (unless we
    215  // have memory over-writes, or DRAM failures).
    216  virtual Inconsistencies FindCorruption(const SampleSet& snapshot) const;
    217 
    218  //----------------------------------------------------------------------------
    219  // Accessors for factory constuction, serialization and testing.
    220  //----------------------------------------------------------------------------
    221  virtual ClassType histogram_type() const;
    222  Sample declared_min() const { return declared_min_; }
    223  Sample declared_max() const { return declared_max_; }
    224  virtual Sample ranges(size_t i) const;
    225  uint32_t range_checksum() const { return range_checksum_; }
    226  virtual size_t bucket_count() const;
    227 
    228  virtual SampleSet SnapshotSample() const;
    229 
    230  virtual bool HasConstructorArguments(Sample minimum, Sample maximum,
    231                                       size_t bucket_count);
    232 
    233  virtual bool HasConstructorTimeDeltaArguments(TimeDelta minimum,
    234                                                TimeDelta maximum,
    235                                                size_t bucket_count);
    236  // Return true iff the range_checksum_ matches current ranges_ vector.
    237  bool HasValidRangeChecksum() const;
    238 
    239 protected:
    240  Histogram(Sample minimum, Sample maximum, size_t bucket_count);
    241  Histogram(TimeDelta minimum, TimeDelta maximum, size_t bucket_count);
    242 
    243  // Initialize ranges_ mapping from raw data.
    244  void InitializeBucketRangeFromData(const int* buckets);
    245 
    246  // Method to override to skip the display of the i'th bucket if it's empty.
    247  virtual bool PrintEmptyBucket(size_t index) const;
    248 
    249  //----------------------------------------------------------------------------
    250  // Methods to override to create histogram with different bucket widths.
    251  //----------------------------------------------------------------------------
    252  // Find bucket to increment for sample value.
    253  virtual size_t BucketIndex(Sample value) const;
    254  // Get normalized size, relative to the ranges_[i].
    255  virtual double GetBucketSize(Count current, size_t i) const;
    256 
    257  // Recalculate range_checksum_.
    258  void ResetRangeChecksum();
    259 
    260  // Return a string description of what goes in a given bucket.
    261  // Most commonly this is the numeric value, but in derived classes it may
    262  // be a name (or string description) given to the bucket.
    263  virtual const std::string GetAsciiBucketRange(size_t it) const;
    264 
    265  //----------------------------------------------------------------------------
    266  // Methods to override to create thread safe histogram.
    267  //----------------------------------------------------------------------------
    268  // Update all our internal data, including histogram
    269  virtual void Accumulate(Sample value, Count count, size_t index);
    270 
    271  // Validate that ranges_ was created sensibly (top and bottom range
    272  // values relate properly to the declared_min_ and declared_max_)..
    273  bool ValidateBucketRanges() const;
    274 
    275  virtual uint32_t CalculateRangeChecksum() const;
    276 
    277  // Finally, provide the state that changes with the addition of each new
    278  // sample.
    279  SampleSet sample_;
    280 
    281 private:
    282  // Post constructor initialization.
    283  void Initialize();
    284 
    285  // Checksum function for accumulating range values into a checksum.
    286  static uint32_t Crc32(uint32_t sum, Sample range);
    287 
    288  //----------------------------------------------------------------------------
    289  // Helpers for emitting Ascii graphic.  Each method appends data to output.
    290 
    291  // Find out how large the (graphically) the largest bucket will appear to be.
    292  double GetPeakBucketSize(const SampleSet& snapshot) const;
    293 
    294  //----------------------------------------------------------------------------
    295  // Table for generating Crc32 values.
    296  static const uint32_t kCrcTable[256];
    297  //----------------------------------------------------------------------------
    298  // Invariant values set at/near construction time
    299 
    300  Sample declared_min_;  // Less than this goes into counts_[0]
    301  Sample declared_max_;  // Over this goes into counts_[bucket_count_ - 1].
    302  size_t bucket_count_;  // Dimension of counts_[].
    303 
    304  // Flag the histogram for recording by UMA via metric_services.h.
    305  Flags flags_;
    306 
    307  // For each index, show the least value that can be stored in the
    308  // corresponding bucket. We also append one extra element in this array,
    309  // containing kSampleType_MAX, to make calculations easy.
    310  // The dimension of ranges_ is bucket_count + 1.
    311  Ranges ranges_;
    312 
    313  // For redundancy, we store a checksum of all the sample ranges when ranges
    314  // are generated.  If ever there is ever a difference, then the histogram must
    315  // have been corrupted.
    316  uint32_t range_checksum_;
    317 
    318  DISALLOW_COPY_AND_ASSIGN(Histogram);
    319 };
    320 
    321 //------------------------------------------------------------------------------
    322 
    323 // LinearHistogram is a more traditional histogram, with evenly spaced
    324 // buckets.
    325 class LinearHistogram : public Histogram {
    326 public:
    327  virtual ~LinearHistogram();
    328 
    329  /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit
    330     default underflow bucket. */
    331  static Histogram* FactoryGet(Sample minimum, Sample maximum,
    332                               size_t bucket_count, Flags flags,
    333                               const int* buckets);
    334 
    335  // Overridden from Histogram:
    336  virtual ClassType histogram_type() const override;
    337 
    338  virtual void Accumulate(Sample value, Count count, size_t index) override;
    339 
    340  // Store a list of number/text values for use in rendering the histogram.
    341  // The last element in the array has a null in its "description" slot.
    342  virtual void SetRangeDescriptions(
    343      const DescriptionPair descriptions[]) override;
    344 
    345 protected:
    346  LinearHistogram(Sample minimum, Sample maximum, size_t bucket_count);
    347 
    348  LinearHistogram(TimeDelta minimum, TimeDelta maximum, size_t bucket_count);
    349 
    350  virtual double GetBucketSize(Count current, size_t i) const override;
    351 
    352  // If we have a description for a bucket, then return that.  Otherwise
    353  // let parent class provide a (numeric) description.
    354  virtual const std::string GetAsciiBucketRange(size_t i) const override;
    355 
    356  // Skip printing of name for numeric range if we have a name (and if this is
    357  // an empty bucket).
    358  virtual bool PrintEmptyBucket(size_t index) const override;
    359 
    360 private:
    361  // For some ranges, we store a printable description of a bucket range.
    362  // If there is no desciption, then GetAsciiBucketRange() uses parent class
    363  // to provide a description.
    364  typedef std::map<Sample, std::string> BucketDescriptionMap;
    365  BucketDescriptionMap bucket_description_;
    366 
    367  DISALLOW_COPY_AND_ASSIGN(LinearHistogram);
    368 };
    369 
    370 //------------------------------------------------------------------------------
    371 
    372 // BooleanHistogram is a histogram for booleans.
    373 class BooleanHistogram : public LinearHistogram {
    374 public:
    375  static Histogram* FactoryGet(Flags flags, const int* buckets);
    376 
    377  virtual ClassType histogram_type() const override;
    378 
    379  virtual void AddBoolean(bool value) override;
    380 
    381  virtual void Accumulate(Sample value, Count count, size_t index) override;
    382 
    383 protected:
    384  explicit BooleanHistogram();
    385 
    386  DISALLOW_COPY_AND_ASSIGN(BooleanHistogram);
    387 };
    388 
    389 //------------------------------------------------------------------------------
    390 
    391 // FlagHistogram is like boolean histogram, but only allows a single off/on
    392 // value.
    393 class FlagHistogram : public BooleanHistogram {
    394 public:
    395  static Histogram* FactoryGet(Flags flags, const int* buckets);
    396 
    397  virtual ClassType histogram_type() const override;
    398 
    399  virtual void Accumulate(Sample value, Count count, size_t index) override;
    400 
    401  virtual void AddSampleSet(const SampleSet& sample) override;
    402 
    403  virtual void Clear() override;
    404 
    405 private:
    406  explicit FlagHistogram();
    407  bool mSwitched;
    408 
    409  DISALLOW_COPY_AND_ASSIGN(FlagHistogram);
    410 };
    411 
    412 // CountHistogram only allows a single monotic counter value.
    413 class CountHistogram : public LinearHistogram {
    414 public:
    415  static Histogram* FactoryGet(Flags flags, const int* buckets);
    416 
    417  virtual ClassType histogram_type() const override;
    418 
    419  virtual void Accumulate(Sample value, Count count, size_t index) override;
    420 
    421  virtual void AddSampleSet(const SampleSet& sample) override;
    422 
    423 private:
    424  explicit CountHistogram();
    425 
    426  DISALLOW_COPY_AND_ASSIGN(CountHistogram);
    427 };
    428 
    429 }  // namespace base
    430 
    431 #endif  // BASE_METRICS_HISTOGRAM_H_