numbers.h (14660B)
1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // ----------------------------------------------------------------------------- 16 // File: numbers.h 17 // ----------------------------------------------------------------------------- 18 // 19 // This package contains functions for converting strings to numbers. For 20 // converting numbers to strings, use `StrCat()` or `StrAppend()` in str_cat.h, 21 // which automatically detect and convert most number values appropriately. 22 23 #ifndef ABSL_STRINGS_NUMBERS_H_ 24 #define ABSL_STRINGS_NUMBERS_H_ 25 26 #ifdef __SSSE3__ 27 #include <tmmintrin.h> 28 #endif 29 30 #ifdef _MSC_VER 31 #include <intrin.h> 32 #endif 33 34 #include <cstddef> 35 #include <cstdint> 36 #include <cstdlib> 37 #include <cstring> 38 #include <ctime> 39 #include <limits> 40 #include <string> 41 #include <type_traits> 42 43 #include "absl/base/attributes.h" 44 #include "absl/base/config.h" 45 #include "absl/base/internal/endian.h" 46 #include "absl/base/macros.h" 47 #include "absl/base/nullability.h" 48 #include "absl/base/port.h" 49 #include "absl/numeric/bits.h" 50 #include "absl/numeric/int128.h" 51 #include "absl/strings/string_view.h" 52 53 namespace absl { 54 ABSL_NAMESPACE_BEGIN 55 56 // SimpleAtoi() 57 // 58 // Converts the given string (optionally followed or preceded by ASCII 59 // whitespace) into an integer value, returning `true` if successful. The string 60 // must reflect a base-10 integer whose value falls within the range of the 61 // integer type (optionally preceded by a `+` or `-`). If any errors are 62 // encountered, this function returns `false`, leaving `out` in an unspecified 63 // state. 64 template <typename int_type> 65 [[nodiscard]] bool SimpleAtoi(absl::string_view str, 66 absl::Nonnull<int_type*> out); 67 68 // SimpleAtof() 69 // 70 // Converts the given string (optionally followed or preceded by ASCII 71 // whitespace) into a float, which may be rounded on overflow or underflow, 72 // returning `true` if successful. 73 // See https://en.cppreference.com/w/c/string/byte/strtof for details about the 74 // allowed formats for `str`, except SimpleAtof() is locale-independent and will 75 // always use the "C" locale. If any errors are encountered, this function 76 // returns `false`, leaving `out` in an unspecified state. 77 [[nodiscard]] bool SimpleAtof(absl::string_view str, absl::Nonnull<float*> out); 78 79 // SimpleAtod() 80 // 81 // Converts the given string (optionally followed or preceded by ASCII 82 // whitespace) into a double, which may be rounded on overflow or underflow, 83 // returning `true` if successful. 84 // See https://en.cppreference.com/w/c/string/byte/strtof for details about the 85 // allowed formats for `str`, except SimpleAtod is locale-independent and will 86 // always use the "C" locale. If any errors are encountered, this function 87 // returns `false`, leaving `out` in an unspecified state. 88 [[nodiscard]] bool SimpleAtod(absl::string_view str, 89 absl::Nonnull<double*> out); 90 91 // SimpleAtob() 92 // 93 // Converts the given string into a boolean, returning `true` if successful. 94 // The following case-insensitive strings are interpreted as boolean `true`: 95 // "true", "t", "yes", "y", "1". The following case-insensitive strings 96 // are interpreted as boolean `false`: "false", "f", "no", "n", "0". If any 97 // errors are encountered, this function returns `false`, leaving `out` in an 98 // unspecified state. 99 [[nodiscard]] bool SimpleAtob(absl::string_view str, absl::Nonnull<bool*> out); 100 101 // SimpleHexAtoi() 102 // 103 // Converts a hexadecimal string (optionally followed or preceded by ASCII 104 // whitespace) to an integer, returning `true` if successful. Only valid base-16 105 // hexadecimal integers whose value falls within the range of the integer type 106 // (optionally preceded by a `+` or `-`) can be converted. A valid hexadecimal 107 // value may include both upper and lowercase character symbols, and may 108 // optionally include a leading "0x" (or "0X") number prefix, which is ignored 109 // by this function. If any errors are encountered, this function returns 110 // `false`, leaving `out` in an unspecified state. 111 template <typename int_type> 112 [[nodiscard]] bool SimpleHexAtoi(absl::string_view str, 113 absl::Nonnull<int_type*> out); 114 115 // Overloads of SimpleHexAtoi() for 128 bit integers. 116 [[nodiscard]] inline bool SimpleHexAtoi(absl::string_view str, 117 absl::Nonnull<absl::int128*> out); 118 [[nodiscard]] inline bool SimpleHexAtoi(absl::string_view str, 119 absl::Nonnull<absl::uint128*> out); 120 121 ABSL_NAMESPACE_END 122 } // namespace absl 123 124 // End of public API. Implementation details follow. 125 126 namespace absl { 127 ABSL_NAMESPACE_BEGIN 128 namespace numbers_internal { 129 130 template <typename int_type> 131 constexpr bool is_signed() { 132 if constexpr (std::is_arithmetic<int_type>::value) { 133 // Use std::numeric_limits<T>::is_signed where it's defined to work. 134 return std::numeric_limits<int_type>::is_signed; 135 } 136 // TODO(jorg): This signed-ness check is used because it works correctly 137 // with enums, and it also serves to check that int_type is not a pointer. 138 // If one day something like std::is_signed<enum E> works, switch to it. 139 return static_cast<int_type>(1) - 2 < 0; 140 } 141 142 // Digit conversion. 143 ABSL_DLL extern const char kHexChar[17]; // 0123456789abcdef 144 ABSL_DLL extern const char 145 kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011... 146 147 // Writes a two-character representation of 'i' to 'buf'. 'i' must be in the 148 // range 0 <= i < 100, and buf must have space for two characters. Example: 149 // char buf[2]; 150 // PutTwoDigits(42, buf); 151 // // buf[0] == '4' 152 // // buf[1] == '2' 153 void PutTwoDigits(uint32_t i, absl::Nonnull<char*> buf); 154 155 // safe_strto?() functions for implementing SimpleAtoi() 156 157 bool safe_strto8_base(absl::string_view text, absl::Nonnull<int8_t*> value, 158 int base); 159 bool safe_strto16_base(absl::string_view text, absl::Nonnull<int16_t*> value, 160 int base); 161 bool safe_strto32_base(absl::string_view text, absl::Nonnull<int32_t*> value, 162 int base); 163 bool safe_strto64_base(absl::string_view text, absl::Nonnull<int64_t*> value, 164 int base); 165 bool safe_strto128_base(absl::string_view text, 166 absl::Nonnull<absl::int128*> value, int base); 167 bool safe_strtou8_base(absl::string_view text, absl::Nonnull<uint8_t*> value, 168 int base); 169 bool safe_strtou16_base(absl::string_view text, absl::Nonnull<uint16_t*> value, 170 int base); 171 bool safe_strtou32_base(absl::string_view text, absl::Nonnull<uint32_t*> value, 172 int base); 173 bool safe_strtou64_base(absl::string_view text, absl::Nonnull<uint64_t*> value, 174 int base); 175 bool safe_strtou128_base(absl::string_view text, 176 absl::Nonnull<absl::uint128*> value, int base); 177 178 static const int kFastToBufferSize = 32; 179 static const int kSixDigitsToBufferSize = 16; 180 181 // Helper function for fast formatting of floating-point values. 182 // The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six 183 // significant digits are returned, trailing zeros are removed, and numbers 184 // outside the range 0.0001-999999 are output using scientific notation 185 // (1.23456e+06). This routine is heavily optimized. 186 // Required buffer size is `kSixDigitsToBufferSize`. 187 size_t SixDigitsToBuffer(double d, absl::Nonnull<char*> buffer); 188 189 // WARNING: These functions may write more characters than necessary, because 190 // they are intended for speed. All functions take an output buffer 191 // as an argument and return a pointer to the last byte they wrote, which is the 192 // terminating '\0'. At most `kFastToBufferSize` bytes are written. 193 absl::Nonnull<char*> FastIntToBuffer(int32_t i, absl::Nonnull<char*> buffer) 194 ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); 195 absl::Nonnull<char*> FastIntToBuffer(uint32_t n, absl::Nonnull<char*> out_str) 196 ABSL_INTERNAL_NEED_MIN_SIZE(out_str, kFastToBufferSize); 197 absl::Nonnull<char*> FastIntToBuffer(int64_t i, absl::Nonnull<char*> buffer) 198 ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); 199 absl::Nonnull<char*> FastIntToBuffer(uint64_t i, absl::Nonnull<char*> buffer) 200 ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize); 201 202 // For enums and integer types that are not an exact match for the types above, 203 // use templates to call the appropriate one of the four overloads above. 204 template <typename int_type> 205 absl::Nonnull<char*> FastIntToBuffer(int_type i, absl::Nonnull<char*> buffer) 206 ABSL_INTERNAL_NEED_MIN_SIZE(buffer, kFastToBufferSize) { 207 static_assert(sizeof(i) <= 64 / 8, 208 "FastIntToBuffer works only with 64-bit-or-less integers."); 209 // These conditions are constexpr bools to suppress MSVC warning C4127. 210 constexpr bool kIsSigned = is_signed<int_type>(); 211 constexpr bool kUse64Bit = sizeof(i) > 32 / 8; 212 if (kIsSigned) { 213 if (kUse64Bit) { 214 return FastIntToBuffer(static_cast<int64_t>(i), buffer); 215 } else { 216 return FastIntToBuffer(static_cast<int32_t>(i), buffer); 217 } 218 } else { 219 if (kUse64Bit) { 220 return FastIntToBuffer(static_cast<uint64_t>(i), buffer); 221 } else { 222 return FastIntToBuffer(static_cast<uint32_t>(i), buffer); 223 } 224 } 225 } 226 227 // Implementation of SimpleAtoi, generalized to support arbitrary base (used 228 // with base different from 10 elsewhere in Abseil implementation). 229 template <typename int_type> 230 [[nodiscard]] bool safe_strtoi_base(absl::string_view s, 231 absl::Nonnull<int_type*> out, int base) { 232 static_assert(sizeof(*out) == 1 || sizeof(*out) == 2 || sizeof(*out) == 4 || 233 sizeof(*out) == 8, 234 "SimpleAtoi works only with 8, 16, 32, or 64-bit integers."); 235 static_assert(!std::is_floating_point<int_type>::value, 236 "Use SimpleAtof or SimpleAtod instead."); 237 bool parsed; 238 // These conditions are constexpr bools to suppress MSVC warning C4127. 239 constexpr bool kIsSigned = is_signed<int_type>(); 240 constexpr int kIntTypeSize = sizeof(*out) * 8; 241 if (kIsSigned) { 242 if (kIntTypeSize == 64) { 243 int64_t val; 244 parsed = numbers_internal::safe_strto64_base(s, &val, base); 245 *out = static_cast<int_type>(val); 246 } else if (kIntTypeSize == 32) { 247 int32_t val; 248 parsed = numbers_internal::safe_strto32_base(s, &val, base); 249 *out = static_cast<int_type>(val); 250 } else if (kIntTypeSize == 16) { 251 int16_t val; 252 parsed = numbers_internal::safe_strto16_base(s, &val, base); 253 *out = static_cast<int_type>(val); 254 } else if (kIntTypeSize == 8) { 255 int8_t val; 256 parsed = numbers_internal::safe_strto8_base(s, &val, base); 257 *out = static_cast<int_type>(val); 258 } 259 } else { 260 if (kIntTypeSize == 64) { 261 uint64_t val; 262 parsed = numbers_internal::safe_strtou64_base(s, &val, base); 263 *out = static_cast<int_type>(val); 264 } else if (kIntTypeSize == 32) { 265 uint32_t val; 266 parsed = numbers_internal::safe_strtou32_base(s, &val, base); 267 *out = static_cast<int_type>(val); 268 } else if (kIntTypeSize == 16) { 269 uint16_t val; 270 parsed = numbers_internal::safe_strtou16_base(s, &val, base); 271 *out = static_cast<int_type>(val); 272 } else if (kIntTypeSize == 8) { 273 uint8_t val; 274 parsed = numbers_internal::safe_strtou8_base(s, &val, base); 275 *out = static_cast<int_type>(val); 276 } 277 } 278 return parsed; 279 } 280 281 // FastHexToBufferZeroPad16() 282 // 283 // Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but 284 // without the terminating null character. Thus `out` must be of length >= 16. 285 // Returns the number of non-pad digits of the output (it can never be zero 286 // since 0 has one digit). 287 inline size_t FastHexToBufferZeroPad16(uint64_t val, absl::Nonnull<char*> out) { 288 #ifdef ABSL_INTERNAL_HAVE_SSSE3 289 uint64_t be = absl::big_endian::FromHost64(val); 290 const auto kNibbleMask = _mm_set1_epi8(0xf); 291 const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', 292 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'); 293 auto v = _mm_loadl_epi64(reinterpret_cast<__m128i*>(&be)); // load lo dword 294 auto v4 = _mm_srli_epi64(v, 4); // shift 4 right 295 auto il = _mm_unpacklo_epi8(v4, v); // interleave bytes 296 auto m = _mm_and_si128(il, kNibbleMask); // mask out nibbles 297 auto hexchars = _mm_shuffle_epi8(kHexDigits, m); // hex chars 298 _mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars); 299 #else 300 for (int i = 0; i < 8; ++i) { 301 auto byte = (val >> (56 - 8 * i)) & 0xFF; 302 auto* hex = &absl::numbers_internal::kHexTable[byte * 2]; 303 std::memcpy(out + 2 * i, hex, 2); 304 } 305 #endif 306 // | 0x1 so that even 0 has 1 digit. 307 return 16 - static_cast<size_t>(countl_zero(val | 0x1) / 4); 308 } 309 310 } // namespace numbers_internal 311 312 template <typename int_type> 313 [[nodiscard]] bool SimpleAtoi(absl::string_view str, 314 absl::Nonnull<int_type*> out) { 315 return numbers_internal::safe_strtoi_base(str, out, 10); 316 } 317 318 [[nodiscard]] inline bool SimpleAtoi(absl::string_view str, 319 absl::Nonnull<absl::int128*> out) { 320 return numbers_internal::safe_strto128_base(str, out, 10); 321 } 322 323 [[nodiscard]] inline bool SimpleAtoi(absl::string_view str, 324 absl::Nonnull<absl::uint128*> out) { 325 return numbers_internal::safe_strtou128_base(str, out, 10); 326 } 327 328 template <typename int_type> 329 [[nodiscard]] bool SimpleHexAtoi(absl::string_view str, 330 absl::Nonnull<int_type*> out) { 331 return numbers_internal::safe_strtoi_base(str, out, 16); 332 } 333 334 [[nodiscard]] inline bool SimpleHexAtoi(absl::string_view str, 335 absl::Nonnull<absl::int128*> out) { 336 return numbers_internal::safe_strto128_base(str, out, 16); 337 } 338 339 [[nodiscard]] inline bool SimpleHexAtoi(absl::string_view str, 340 absl::Nonnull<absl::uint128*> out) { 341 return numbers_internal::safe_strtou128_base(str, out, 16); 342 } 343 344 ABSL_NAMESPACE_END 345 } // namespace absl 346 347 #endif // ABSL_STRINGS_NUMBERS_H_