chi_square.cc (7192B)
1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "absl/random/internal/chi_square.h" 16 17 #include <cmath> 18 19 #include "absl/random/internal/distribution_test_util.h" 20 21 namespace absl { 22 ABSL_NAMESPACE_BEGIN 23 namespace random_internal { 24 namespace { 25 26 #if defined(__EMSCRIPTEN__) 27 // Workaround __EMSCRIPTEN__ error: llvm_fma_f64 not found. 28 inline double fma(double x, double y, double z) { return (x * y) + z; } 29 #endif 30 31 // Use Horner's method to evaluate a polynomial. 32 template <typename T, unsigned N> 33 inline T EvaluatePolynomial(T x, const T (&poly)[N]) { 34 #if !defined(__EMSCRIPTEN__) 35 using std::fma; 36 #endif 37 T p = poly[N - 1]; 38 for (unsigned i = 2; i <= N; i++) { 39 p = fma(p, x, poly[N - i]); 40 } 41 return p; 42 } 43 44 static constexpr int kLargeDOF = 150; 45 46 // Returns the probability of a normal z-value. 47 // 48 // Adapted from the POZ function in: 49 // Ibbetson D, Algorithm 209 50 // Collected Algorithms of the CACM 1963 p. 616 51 // 52 double POZ(double z) { 53 static constexpr double kP1[] = { 54 0.797884560593, -0.531923007300, 0.319152932694, 55 -0.151968751364, 0.059054035642, -0.019198292004, 56 0.005198775019, -0.001075204047, 0.000124818987, 57 }; 58 static constexpr double kP2[] = { 59 0.999936657524, 0.000535310849, -0.002141268741, 0.005353579108, 60 -0.009279453341, 0.011630447319, -0.010557625006, 0.006549791214, 61 -0.002034254874, -0.000794620820, 0.001390604284, -0.000676904986, 62 -0.000019538132, 0.000152529290, -0.000045255659, 63 }; 64 65 const double kZMax = 6.0; // Maximum meaningful z-value. 66 if (z == 0.0) { 67 return 0.5; 68 } 69 double x; 70 double y = 0.5 * std::fabs(z); 71 if (y >= (kZMax * 0.5)) { 72 x = 1.0; 73 } else if (y < 1.0) { 74 double w = y * y; 75 x = EvaluatePolynomial(w, kP1) * y * 2.0; 76 } else { 77 y -= 2.0; 78 x = EvaluatePolynomial(y, kP2); 79 } 80 return z > 0.0 ? ((x + 1.0) * 0.5) : ((1.0 - x) * 0.5); 81 } 82 83 // Approximates the survival function of the normal distribution. 84 // 85 // Algorithm 26.2.18, from: 86 // [Abramowitz and Stegun, Handbook of Mathematical Functions,p.932] 87 // http://people.math.sfu.ca/~cbm/aands/abramowitz_and_stegun.pdf 88 // 89 double normal_survival(double z) { 90 // Maybe replace with the alternate formulation. 91 // 0.5 * erfc((x - mean)/(sqrt(2) * sigma)) 92 static constexpr double kR[] = { 93 1.0, 0.196854, 0.115194, 0.000344, 0.019527, 94 }; 95 double r = EvaluatePolynomial(z, kR); 96 r *= r; 97 return 0.5 / (r * r); 98 } 99 100 } // namespace 101 102 // Calculates the critical chi-square value given degrees-of-freedom and a 103 // p-value, usually using bisection. Also known by the name CRITCHI. 104 double ChiSquareValue(int dof, double p) { 105 static constexpr double kChiEpsilon = 106 0.000001; // Accuracy of the approximation. 107 static constexpr double kChiMax = 99999.0; // Maximum chi-squared value. 108 109 const double p_value = 1.0 - p; 110 if (dof < 1 || p_value > 1.0) { 111 return 0.0; 112 } 113 114 if (dof > kLargeDOF) { 115 // For large degrees of freedom, use the normal approximation by 116 // Wilson, E. B. and Hilferty, M. M. (1931) 117 // chi^2 - mean 118 // Z = -------------- 119 // stddev 120 const double z = InverseNormalSurvival(p_value); 121 const double mean = 1 - 2.0 / (9 * dof); 122 const double variance = 2.0 / (9 * dof); 123 // Cannot use this method if the variance is 0. 124 if (variance != 0) { 125 double term = z * std::sqrt(variance) + mean; 126 return dof * (term * term * term); 127 } 128 } 129 130 if (p_value <= 0.0) return kChiMax; 131 132 // Otherwise search for the p value by bisection 133 double min_chisq = 0.0; 134 double max_chisq = kChiMax; 135 double current = dof / std::sqrt(p_value); 136 while ((max_chisq - min_chisq) > kChiEpsilon) { 137 if (ChiSquarePValue(current, dof) < p_value) { 138 max_chisq = current; 139 } else { 140 min_chisq = current; 141 } 142 current = (max_chisq + min_chisq) * 0.5; 143 } 144 return current; 145 } 146 147 // Calculates the p-value (probability) of a given chi-square value 148 // and degrees of freedom. 149 // 150 // Adapted from the POCHISQ function from: 151 // Hill, I. D. and Pike, M. C. Algorithm 299 152 // Collected Algorithms of the CACM 1963 p. 243 153 // 154 double ChiSquarePValue(double chi_square, int dof) { 155 static constexpr double kLogSqrtPi = 156 0.5723649429247000870717135; // Log[Sqrt[Pi]] 157 static constexpr double kInverseSqrtPi = 158 0.5641895835477562869480795; // 1/(Sqrt[Pi]) 159 160 // For large degrees of freedom, use the normal approximation by 161 // Wilson, E. B. and Hilferty, M. M. (1931) 162 // Via Wikipedia: 163 // By the Central Limit Theorem, because the chi-square distribution is the 164 // sum of k independent random variables with finite mean and variance, it 165 // converges to a normal distribution for large k. 166 if (dof > kLargeDOF) { 167 // Re-scale everything. 168 const double chi_square_scaled = std::pow(chi_square / dof, 1.0 / 3); 169 const double mean = 1 - 2.0 / (9 * dof); 170 const double variance = 2.0 / (9 * dof); 171 // If variance is 0, this method cannot be used. 172 if (variance != 0) { 173 const double z = (chi_square_scaled - mean) / std::sqrt(variance); 174 if (z > 0) { 175 return normal_survival(z); 176 } else if (z < 0) { 177 return 1.0 - normal_survival(-z); 178 } else { 179 return 0.5; 180 } 181 } 182 } 183 184 // The chi square function is >= 0 for any degrees of freedom. 185 // In other words, probability that the chi square function >= 0 is 1. 186 if (chi_square <= 0.0) return 1.0; 187 188 // If the degrees of freedom is zero, the chi square function is always 0 by 189 // definition. In other words, the probability that the chi square function 190 // is > 0 is zero (chi square values <= 0 have been filtered above). 191 if (dof < 1) return 0; 192 193 auto capped_exp = [](double x) { return x < -20 ? 0.0 : std::exp(x); }; 194 static constexpr double kBigX = 20; 195 196 double a = 0.5 * chi_square; 197 const bool even = !(dof & 1); // True if dof is an even number. 198 const double y = capped_exp(-a); 199 double s = even ? y : (2.0 * POZ(-std::sqrt(chi_square))); 200 201 if (dof <= 2) { 202 return s; 203 } 204 205 chi_square = 0.5 * (dof - 1.0); 206 double z = (even ? 1.0 : 0.5); 207 if (a > kBigX) { 208 double e = (even ? 0.0 : kLogSqrtPi); 209 double c = std::log(a); 210 while (z <= chi_square) { 211 e = std::log(z) + e; 212 s += capped_exp(c * z - a - e); 213 z += 1.0; 214 } 215 return s; 216 } 217 218 double e = (even ? 1.0 : (kInverseSqrtPi / std::sqrt(a))); 219 double c = 0.0; 220 while (z <= chi_square) { 221 e = e * (a / z); 222 c = c + e; 223 z += 1.0; 224 } 225 return c * y + s; 226 } 227 228 } // namespace random_internal 229 ABSL_NAMESPACE_END 230 } // namespace absl