charconv_bigint.h (15130B)
1 // Copyright 2018 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ 16 #define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ 17 18 #include <algorithm> 19 #include <cstdint> 20 #include <ostream> 21 #include <string> 22 23 #include "absl/base/config.h" 24 #include "absl/strings/ascii.h" 25 #include "absl/strings/internal/charconv_parse.h" 26 #include "absl/strings/string_view.h" 27 28 namespace absl { 29 ABSL_NAMESPACE_BEGIN 30 namespace strings_internal { 31 32 // The largest power that 5 that can be raised to, and still fit in a uint32_t. 33 constexpr int kMaxSmallPowerOfFive = 13; 34 // The largest power that 10 that can be raised to, and still fit in a uint32_t. 35 constexpr int kMaxSmallPowerOfTen = 9; 36 37 ABSL_DLL extern const uint32_t 38 kFiveToNth[kMaxSmallPowerOfFive + 1]; 39 ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; 40 41 // Large, fixed-width unsigned integer. 42 // 43 // Exact rounding for decimal-to-binary floating point conversion requires very 44 // large integer math, but a design goal of absl::from_chars is to avoid 45 // allocating memory. The integer precision needed for decimal-to-binary 46 // conversions is large but bounded, so a huge fixed-width integer class 47 // suffices. 48 // 49 // This is an intentionally limited big integer class. Only needed operations 50 // are implemented. All storage lives in an array data member, and all 51 // arithmetic is done in-place, to avoid requiring separate storage for operand 52 // and result. 53 // 54 // This is an internal class. Some methods live in the .cc file, and are 55 // instantiated only for the values of max_words we need. 56 template <int max_words> 57 class BigUnsigned { 58 public: 59 static_assert(max_words == 4 || max_words == 84, 60 "unsupported max_words value"); 61 62 BigUnsigned() : size_(0), words_{} {} 63 explicit constexpr BigUnsigned(uint64_t v) 64 : size_((v >> 32) ? 2 : v ? 1 : 0), 65 words_{static_cast<uint32_t>(v & 0xffffffffu), 66 static_cast<uint32_t>(v >> 32)} {} 67 68 // Constructs a BigUnsigned from the given string_view containing a decimal 69 // value. If the input string is not a decimal integer, constructs a 0 70 // instead. 71 explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { 72 // Check for valid input, returning a 0 otherwise. This is reasonable 73 // behavior only because this constructor is for unit tests. 74 if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() || 75 sv.empty()) { 76 return; 77 } 78 int exponent_adjust = 79 ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1); 80 if (exponent_adjust > 0) { 81 MultiplyByTenToTheNth(exponent_adjust); 82 } 83 } 84 85 // Loads the mantissa value of a previously-parsed float. 86 // 87 // Returns the associated decimal exponent. The value of the parsed float is 88 // exactly *this * 10**exponent. 89 int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits); 90 91 // Returns the number of decimal digits of precision this type provides. All 92 // numbers with this many decimal digits or fewer are representable by this 93 // type. 94 // 95 // Analogous to std::numeric_limits<BigUnsigned>::digits10. 96 static constexpr int Digits10() { 97 // 9975007/1035508 is very slightly less than log10(2**32). 98 return static_cast<uint64_t>(max_words) * 9975007 / 1035508; 99 } 100 101 // Shifts left by the given number of bits. 102 void ShiftLeft(int count) { 103 if (count > 0) { 104 const int word_shift = count / 32; 105 if (word_shift >= max_words) { 106 SetToZero(); 107 return; 108 } 109 size_ = (std::min)(size_ + word_shift, max_words); 110 count %= 32; 111 if (count == 0) { 112 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=warray-bounds 113 // shows a lot of bogus -Warray-bounds warnings under GCC. 114 // This is not the only one in Abseil. 115 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(14, 0) 116 #pragma GCC diagnostic push 117 #pragma GCC diagnostic ignored "-Warray-bounds" 118 #endif 119 std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_); 120 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(14, 0) 121 #pragma GCC diagnostic pop 122 #endif 123 } else { 124 for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) { 125 words_[i] = (words_[i - word_shift] << count) | 126 (words_[i - word_shift - 1] >> (32 - count)); 127 } 128 words_[word_shift] = words_[0] << count; 129 // Grow size_ if necessary. 130 if (size_ < max_words && words_[size_]) { 131 ++size_; 132 } 133 } 134 std::fill_n(words_, word_shift, 0u); 135 } 136 } 137 138 139 // Multiplies by v in-place. 140 void MultiplyBy(uint32_t v) { 141 if (size_ == 0 || v == 1) { 142 return; 143 } 144 if (v == 0) { 145 SetToZero(); 146 return; 147 } 148 const uint64_t factor = v; 149 uint64_t window = 0; 150 for (int i = 0; i < size_; ++i) { 151 window += factor * words_[i]; 152 words_[i] = window & 0xffffffff; 153 window >>= 32; 154 } 155 // If carry bits remain and there's space for them, grow size_. 156 if (window && size_ < max_words) { 157 words_[size_] = window & 0xffffffff; 158 ++size_; 159 } 160 } 161 162 void MultiplyBy(uint64_t v) { 163 uint32_t words[2]; 164 words[0] = static_cast<uint32_t>(v); 165 words[1] = static_cast<uint32_t>(v >> 32); 166 if (words[1] == 0) { 167 MultiplyBy(words[0]); 168 } else { 169 MultiplyBy(2, words); 170 } 171 } 172 173 // Multiplies in place by 5 to the power of n. n must be non-negative. 174 void MultiplyByFiveToTheNth(int n) { 175 while (n >= kMaxSmallPowerOfFive) { 176 MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]); 177 n -= kMaxSmallPowerOfFive; 178 } 179 if (n > 0) { 180 MultiplyBy(kFiveToNth[n]); 181 } 182 } 183 184 // Multiplies in place by 10 to the power of n. n must be non-negative. 185 void MultiplyByTenToTheNth(int n) { 186 if (n > kMaxSmallPowerOfTen) { 187 // For large n, raise to a power of 5, then shift left by the same amount. 188 // (10**n == 5**n * 2**n.) This requires fewer multiplications overall. 189 MultiplyByFiveToTheNth(n); 190 ShiftLeft(n); 191 } else if (n > 0) { 192 // We can do this more quickly for very small N by using a single 193 // multiplication. 194 MultiplyBy(kTenToNth[n]); 195 } 196 } 197 198 // Returns the value of 5**n, for non-negative n. This implementation uses 199 // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling 200 // MultiplyByFiveToTheNth(). 201 static BigUnsigned FiveToTheNth(int n); 202 203 // Multiplies by another BigUnsigned, in-place. 204 template <int M> 205 void MultiplyBy(const BigUnsigned<M>& other) { 206 MultiplyBy(other.size(), other.words()); 207 } 208 209 void SetToZero() { 210 std::fill_n(words_, size_, 0u); 211 size_ = 0; 212 } 213 214 // Returns the value of the nth word of this BigUnsigned. This is 215 // range-checked, and returns 0 on out-of-bounds accesses. 216 uint32_t GetWord(int index) const { 217 if (index < 0 || index >= size_) { 218 return 0; 219 } 220 return words_[index]; 221 } 222 223 // Returns this integer as a decimal string. This is not used in the decimal- 224 // to-binary conversion; it is intended to aid in testing. 225 std::string ToString() const; 226 227 int size() const { return size_; } 228 const uint32_t* words() const { return words_; } 229 230 private: 231 // Reads the number between [begin, end), possibly containing a decimal point, 232 // into this BigUnsigned. 233 // 234 // Callers are required to ensure [begin, end) contains a valid number, with 235 // one or more decimal digits and at most one decimal point. This routine 236 // will behave unpredictably if these preconditions are not met. 237 // 238 // Only the first `significant_digits` digits are read. Digits beyond this 239 // limit are "sticky": If the final significant digit is 0 or 5, and if any 240 // dropped digit is nonzero, then that final significant digit is adjusted up 241 // to 1 or 6. This adjustment allows for precise rounding. 242 // 243 // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to 244 // account for the decimal point and for dropped significant digits. After 245 // this function returns, 246 // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment. 247 int ReadDigits(const char* begin, const char* end, int significant_digits); 248 249 // Performs a step of big integer multiplication. This computes the full 250 // (64-bit-wide) values that should be added at the given index (step), and 251 // adds to that location in-place. 252 // 253 // Because our math all occurs in place, we must multiply starting from the 254 // highest word working downward. (This is a bit more expensive due to the 255 // extra carries involved.) 256 // 257 // This must be called in steps, for each word to be calculated, starting from 258 // the high end and working down to 0. The first value of `step` should be 259 // `std::min(original_size + other.size_ - 2, max_words - 1)`. 260 // The reason for this expression is that multiplying the i'th word from one 261 // multiplicand and the j'th word of another multiplicand creates a 262 // two-word-wide value to be stored at the (i+j)'th element. The highest 263 // word indices we will access are `original_size - 1` from this object, and 264 // `other.size_ - 1` from our operand. Therefore, 265 // `original_size + other.size_ - 2` is the first step we should calculate, 266 // but limited on an upper bound by max_words. 267 268 // Working from high-to-low ensures that we do not overwrite the portions of 269 // the initial value of *this which are still needed for later steps. 270 // 271 // Once called with step == 0, *this contains the result of the 272 // multiplication. 273 // 274 // `original_size` is the size_ of *this before the first call to 275 // MultiplyStep(). `other_words` and `other_size` are the contents of our 276 // operand. `step` is the step to perform, as described above. 277 void MultiplyStep(int original_size, const uint32_t* other_words, 278 int other_size, int step); 279 280 void MultiplyBy(int other_size, const uint32_t* other_words) { 281 const int original_size = size_; 282 const int first_step = 283 (std::min)(original_size + other_size - 2, max_words - 1); 284 for (int step = first_step; step >= 0; --step) { 285 MultiplyStep(original_size, other_words, other_size, step); 286 } 287 } 288 289 // Adds a 32-bit value to the index'th word, with carry. 290 void AddWithCarry(int index, uint32_t value) { 291 if (value) { 292 while (index < max_words && value > 0) { 293 words_[index] += value; 294 // carry if we overflowed in this word: 295 if (value > words_[index]) { 296 value = 1; 297 ++index; 298 } else { 299 value = 0; 300 } 301 } 302 size_ = (std::min)(max_words, (std::max)(index + 1, size_)); 303 } 304 } 305 306 void AddWithCarry(int index, uint64_t value) { 307 if (value && index < max_words) { 308 uint32_t high = value >> 32; 309 uint32_t low = value & 0xffffffff; 310 words_[index] += low; 311 if (words_[index] < low) { 312 ++high; 313 if (high == 0) { 314 // Carry from the low word caused our high word to overflow. 315 // Short circuit here to do the right thing. 316 AddWithCarry(index + 2, static_cast<uint32_t>(1)); 317 return; 318 } 319 } 320 if (high > 0) { 321 AddWithCarry(index + 1, high); 322 } else { 323 // Normally 32-bit AddWithCarry() sets size_, but since we don't call 324 // it when `high` is 0, do it ourselves here. 325 size_ = (std::min)(max_words, (std::max)(index + 1, size_)); 326 } 327 } 328 } 329 330 // Divide this in place by a constant divisor. Returns the remainder of the 331 // division. 332 template <uint32_t divisor> 333 uint32_t DivMod() { 334 uint64_t accumulator = 0; 335 for (int i = size_ - 1; i >= 0; --i) { 336 accumulator <<= 32; 337 accumulator += words_[i]; 338 // accumulator / divisor will never overflow an int32_t in this loop 339 words_[i] = static_cast<uint32_t>(accumulator / divisor); 340 accumulator = accumulator % divisor; 341 } 342 while (size_ > 0 && words_[size_ - 1] == 0) { 343 --size_; 344 } 345 return static_cast<uint32_t>(accumulator); 346 } 347 348 // The number of elements in words_ that may carry significant values. 349 // All elements beyond this point are 0. 350 // 351 // When size_ is 0, this BigUnsigned stores the value 0. 352 // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is 353 // nonzero. This can occur due to overflow truncation. 354 // In particular, x.size_ != y.size_ does *not* imply x != y. 355 int size_; 356 uint32_t words_[max_words]; 357 }; 358 359 // Compares two big integer instances. 360 // 361 // Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs. 362 template <int N, int M> 363 int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { 364 int limit = (std::max)(lhs.size(), rhs.size()); 365 for (int i = limit - 1; i >= 0; --i) { 366 const uint32_t lhs_word = lhs.GetWord(i); 367 const uint32_t rhs_word = rhs.GetWord(i); 368 if (lhs_word < rhs_word) { 369 return -1; 370 } else if (lhs_word > rhs_word) { 371 return 1; 372 } 373 } 374 return 0; 375 } 376 377 template <int N, int M> 378 bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { 379 int limit = (std::max)(lhs.size(), rhs.size()); 380 for (int i = 0; i < limit; ++i) { 381 if (lhs.GetWord(i) != rhs.GetWord(i)) { 382 return false; 383 } 384 } 385 return true; 386 } 387 388 template <int N, int M> 389 bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { 390 return !(lhs == rhs); 391 } 392 393 template <int N, int M> 394 bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { 395 return Compare(lhs, rhs) == -1; 396 } 397 398 template <int N, int M> 399 bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { 400 return rhs < lhs; 401 } 402 template <int N, int M> 403 bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { 404 return !(rhs < lhs); 405 } 406 template <int N, int M> 407 bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { 408 return !(lhs < rhs); 409 } 410 411 // Output operator for BigUnsigned, for testing purposes only. 412 template <int N> 413 std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) { 414 return os << num.ToString(); 415 } 416 417 // Explicit instantiation declarations for the sizes of BigUnsigned that we 418 // are using. 419 // 420 // For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is 421 // still bigger than an int128, and 84 is a large value we will want to use 422 // in the from_chars implementation. 423 // 424 // Comments justifying the use of 84 belong in the from_chars implementation, 425 // and will be added in a follow-up CL. 426 extern template class BigUnsigned<4>; 427 extern template class BigUnsigned<84>; 428 429 } // namespace strings_internal 430 ABSL_NAMESPACE_END 431 } // namespace absl 432 433 #endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_