chi_square.h (3027B)
1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ABSL_RANDOM_INTERNAL_CHI_SQUARE_H_ 16 #define ABSL_RANDOM_INTERNAL_CHI_SQUARE_H_ 17 18 // The chi-square statistic. 19 // 20 // Useful for evaluating if `D` independent random variables are behaving as 21 // expected, or if two distributions are similar. (`D` is the degrees of 22 // freedom). 23 // 24 // Each bucket should have an expected count of 10 or more for the chi square to 25 // be meaningful. 26 27 #include <cassert> 28 29 #include "absl/base/config.h" 30 31 namespace absl { 32 ABSL_NAMESPACE_BEGIN 33 namespace random_internal { 34 35 constexpr const char kChiSquared[] = "chi-squared"; 36 37 // Returns the measured chi square value, using a single expected value. This 38 // assumes that the values in [begin, end) are uniformly distributed. 39 template <typename Iterator> 40 double ChiSquareWithExpected(Iterator begin, Iterator end, double expected) { 41 // Compute the sum and the number of buckets. 42 assert(expected >= 10); // require at least 10 samples per bucket. 43 double chi_square = 0; 44 for (auto it = begin; it != end; it++) { 45 double d = static_cast<double>(*it) - expected; 46 chi_square += d * d; 47 } 48 chi_square = chi_square / expected; 49 return chi_square; 50 } 51 52 // Returns the measured chi square value, taking the actual value of each bucket 53 // from the first set of iterators, and the expected value of each bucket from 54 // the second set of iterators. 55 template <typename Iterator, typename Expected> 56 double ChiSquare(Iterator it, Iterator end, Expected eit, Expected eend) { 57 double chi_square = 0; 58 for (; it != end && eit != eend; ++it, ++eit) { 59 if (*it > 0) { 60 assert(*eit > 0); 61 } 62 double e = static_cast<double>(*eit); 63 double d = static_cast<double>(*it - *eit); 64 if (d != 0) { 65 assert(e > 0); 66 chi_square += (d * d) / e; 67 } 68 } 69 assert(it == end && eit == eend); 70 return chi_square; 71 } 72 73 // ====================================================================== 74 // The following methods can be used for an arbitrary significance level. 75 // 76 77 // Calculates critical chi-square values to produce the given p-value using a 78 // bisection search for a value within epsilon, relying on the monotonicity of 79 // ChiSquarePValue(). 80 double ChiSquareValue(int dof, double p); 81 82 // Calculates the p-value (probability) of a given chi-square value. 83 double ChiSquarePValue(double chi_square, int dof); 84 85 } // namespace random_internal 86 ABSL_NAMESPACE_END 87 } // namespace absl 88 89 #endif // ABSL_RANDOM_INTERNAL_CHI_SQUARE_H_