charconv_parse_test.cc (16872B)
1 // Copyright 2018 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "absl/strings/internal/charconv_parse.h" 16 17 #include <string> 18 #include <utility> 19 20 #include "gmock/gmock.h" 21 #include "gtest/gtest.h" 22 #include "absl/log/check.h" 23 #include "absl/strings/str_cat.h" 24 25 using absl::chars_format; 26 using absl::strings_internal::FloatType; 27 using absl::strings_internal::ParsedFloat; 28 using absl::strings_internal::ParseFloat; 29 30 namespace { 31 32 // Check that a given string input is parsed to the expected mantissa and 33 // exponent. 34 // 35 // Input string `s` must contain a '$' character. It marks the end of the 36 // characters that should be consumed by the match. It is stripped from the 37 // input to ParseFloat. 38 // 39 // If input string `s` contains '[' and ']' characters, these mark the region 40 // of characters that should be marked as the "subrange". For NaNs, this is 41 // the location of the extended NaN string. For numbers, this is the location 42 // of the full, over-large mantissa. 43 template <int base> 44 void ExpectParsedFloat(std::string s, absl::chars_format format_flags, 45 FloatType expected_type, uint64_t expected_mantissa, 46 int expected_exponent, 47 int expected_literal_exponent = -999) { 48 SCOPED_TRACE(s); 49 50 int begin_subrange = -1; 51 int end_subrange = -1; 52 // If s contains '[' and ']', then strip these characters and set the subrange 53 // indices appropriately. 54 std::string::size_type open_bracket_pos = s.find('['); 55 if (open_bracket_pos != std::string::npos) { 56 begin_subrange = static_cast<int>(open_bracket_pos); 57 s.replace(open_bracket_pos, 1, ""); 58 std::string::size_type close_bracket_pos = s.find(']'); 59 CHECK_NE(close_bracket_pos, absl::string_view::npos) 60 << "Test input contains [ without matching ]"; 61 end_subrange = static_cast<int>(close_bracket_pos); 62 s.replace(close_bracket_pos, 1, ""); 63 } 64 const std::string::size_type expected_characters_matched = s.find('$'); 65 CHECK_NE(expected_characters_matched, std::string::npos) 66 << "Input string must contain $"; 67 s.replace(expected_characters_matched, 1, ""); 68 69 ParsedFloat parsed = 70 ParseFloat<base>(s.data(), s.data() + s.size(), format_flags); 71 72 EXPECT_NE(parsed.end, nullptr); 73 if (parsed.end == nullptr) { 74 return; // The following tests are not useful if we fully failed to parse 75 } 76 EXPECT_EQ(parsed.type, expected_type); 77 if (begin_subrange == -1) { 78 EXPECT_EQ(parsed.subrange_begin, nullptr); 79 EXPECT_EQ(parsed.subrange_end, nullptr); 80 } else { 81 EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange); 82 EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange); 83 } 84 if (parsed.type == FloatType::kNumber) { 85 EXPECT_EQ(parsed.mantissa, expected_mantissa); 86 EXPECT_EQ(parsed.exponent, expected_exponent); 87 if (expected_literal_exponent != -999) { 88 EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent); 89 } 90 } 91 auto characters_matched = static_cast<int>(parsed.end - s.data()); 92 EXPECT_EQ(characters_matched, expected_characters_matched); 93 } 94 95 // Check that a given string input is parsed to the expected mantissa and 96 // exponent. 97 // 98 // Input string `s` must contain a '$' character. It marks the end of the 99 // characters that were consumed by the match. 100 template <int base> 101 void ExpectNumber(std::string s, absl::chars_format format_flags, 102 uint64_t expected_mantissa, int expected_exponent, 103 int expected_literal_exponent = -999) { 104 ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber, 105 expected_mantissa, expected_exponent, 106 expected_literal_exponent); 107 } 108 109 // Check that a given string input is parsed to the given special value. 110 // 111 // This tests against both number bases, since infinities and NaNs have 112 // identical representations in both modes. 113 void ExpectSpecial(const std::string& s, absl::chars_format format_flags, 114 FloatType type) { 115 ExpectParsedFloat<10>(s, format_flags, type, 0, 0); 116 ExpectParsedFloat<16>(s, format_flags, type, 0, 0); 117 } 118 119 // Check that a given input string is not matched by Float. 120 template <int base> 121 void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) { 122 ParsedFloat parsed = 123 ParseFloat<base>(s.data(), s.data() + s.size(), format_flags); 124 EXPECT_EQ(parsed.end, nullptr); 125 } 126 127 TEST(ParseFloat, SimpleValue) { 128 // Test that various forms of floating point numbers all parse correctly. 129 ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3); 130 ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3); 131 ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3); 132 ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3); 133 ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3); 134 ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3); 135 ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3); 136 ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3); 137 138 ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8); 139 ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8); 140 ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8); 141 ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef, 142 -8); 143 ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8); 144 ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef, 145 -8); 146 ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8); 147 ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8); 148 149 // ExpectNumber does not attempt to drop trailing zeroes. 150 ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900, 151 -5); 152 ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general, 153 0x1234abcdef000, -20); 154 155 // Ensure non-matching characters after a number are ignored, even when they 156 // look like potentially matching characters. 157 ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3); 158 ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3); 159 ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3); 160 ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3); 161 ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789, 162 -3); 163 ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3); 164 ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3); 165 ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3); 166 ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3); 167 168 ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef, 169 -8); 170 ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef, 171 -8); 172 ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef, 173 -8); 174 ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8); 175 ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general, 176 0x1234abcdef, -8); 177 ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef, 178 -8); 179 ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8); 180 ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8); 181 182 // Ensure we can read a full resolution mantissa without overflow. 183 ExpectNumber<10>("9999999999999999999$", chars_format::general, 184 9999999999999999999u, 0); 185 ExpectNumber<16>("fffffffffffffff$", chars_format::general, 186 0xfffffffffffffffu, 0); 187 188 // Check that zero is consistently read. 189 ExpectNumber<10>("0$", chars_format::general, 0, 0); 190 ExpectNumber<16>("0$", chars_format::general, 0, 0); 191 ExpectNumber<10>("000000000000000000000000000000000000000$", 192 chars_format::general, 0, 0); 193 ExpectNumber<16>("000000000000000000000000000000000000000$", 194 chars_format::general, 0, 0); 195 ExpectNumber<10>("0000000000000000000000.000000000000000000$", 196 chars_format::general, 0, 0); 197 ExpectNumber<16>("0000000000000000000000.000000000000000000$", 198 chars_format::general, 0, 0); 199 ExpectNumber<10>("0.00000000000000000000000000000000e123456$", 200 chars_format::general, 0, 0); 201 ExpectNumber<16>("0.00000000000000000000000000000000p123456$", 202 chars_format::general, 0, 0); 203 } 204 205 TEST(ParseFloat, LargeDecimalMantissa) { 206 // After 19 significant decimal digits in the mantissa, ParsedFloat will 207 // truncate additional digits. We need to test that: 208 // 1) the truncation to 19 digits happens 209 // 2) the returned exponent reflects the dropped significant digits 210 // 3) a correct literal_exponent is set 211 // 212 // If and only if a significant digit is found after 19 digits, then the 213 // entirety of the mantissa in case the exact value is needed to make a 214 // rounding decision. The [ and ] characters below denote where such a 215 // subregion was marked by by ParseFloat. They are not part of the input. 216 217 // Mark a capture group only if a dropped digit is significant (nonzero). 218 ExpectNumber<10>("100000000000000000000000000$", chars_format::general, 219 1000000000000000000, 220 /* adjusted exponent */ 8); 221 222 ExpectNumber<10>("123456789123456789100000000$", chars_format::general, 223 1234567891234567891, 224 /* adjusted exponent */ 8); 225 226 ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general, 227 1234567891234567891, 228 /* adjusted exponent */ 8, 229 /* literal exponent */ 0); 230 231 ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general, 232 1234567891234567891, 233 /* adjusted exponent */ 8, 234 /* literal exponent */ 0); 235 236 ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general, 237 1234567891234567891, 238 /* adjusted exponent */ 8, 239 /* literal exponent */ 0); 240 241 // Leading zeroes should not count towards the 19 significant digit limit 242 ExpectNumber<10>("[00000000123456789123456789123456789]$", 243 chars_format::general, 1234567891234567891, 244 /* adjusted exponent */ 8, 245 /* literal exponent */ 0); 246 247 ExpectNumber<10>("00000000123456789123456789100000000$", 248 chars_format::general, 1234567891234567891, 249 /* adjusted exponent */ 8); 250 251 // Truncated digits after the decimal point should not cause a further 252 // exponent adjustment. 253 ExpectNumber<10>("1.234567891234567891e123$", chars_format::general, 254 1234567891234567891, 105); 255 ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general, 256 1234567891234567891, 257 /* adjusted exponent */ 105, 258 /* literal exponent */ 123); 259 260 // Ensure we truncate, and not round. (The from_chars algorithm we use 261 // depends on our guess missing low, if it misses, so we need the rounding 262 // error to be downward.) 263 ExpectNumber<10>("[1999999999999999999999]$", chars_format::general, 264 1999999999999999999, 265 /* adjusted exponent */ 3, 266 /* literal exponent */ 0); 267 } 268 269 TEST(ParseFloat, LargeHexadecimalMantissa) { 270 // After 15 significant hex digits in the mantissa, ParsedFloat will treat 271 // additional digits as sticky, We need to test that: 272 // 1) The truncation to 15 digits happens 273 // 2) The returned exponent reflects the dropped significant digits 274 // 3) If a nonzero digit is dropped, the low bit of mantissa is set. 275 276 ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general, 277 0x123456789abcdef, 60); 278 279 // Leading zeroes should not count towards the 15 significant digit limit 280 ExpectNumber<16>("000000123456789abcdef123456789abcdef$", 281 chars_format::general, 0x123456789abcdef, 60); 282 283 // Truncated digits after the radix point should not cause a further 284 // exponent adjustment. 285 ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general, 286 0x123456789abcdef, 44); 287 ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$", 288 chars_format::general, 0x123456789abcdef, 44); 289 290 // test sticky digit behavior. The low bit should be set iff any dropped 291 // digit is nonzero. 292 ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general, 293 0x123456789abcdef, 60); 294 ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general, 295 0x123456789abcdef, 60); 296 ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general, 297 0x123456789abcdee, 60); 298 } 299 300 TEST(ParseFloat, ScientificVsFixed) { 301 // In fixed mode, an exponent is never matched (but the remainder of the 302 // number will be matched.) 303 ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8); 304 ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3); 305 ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36); 306 ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8); 307 308 // In scientific mode, numbers don't match *unless* they have an exponent. 309 ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3); 310 ExpectFailedParse<10>("-123456.789$", chars_format::scientific); 311 ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef, 312 -8); 313 ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific); 314 } 315 316 TEST(ParseFloat, Infinity) { 317 ExpectFailedParse<10>("in", chars_format::general); 318 ExpectFailedParse<16>("in", chars_format::general); 319 ExpectFailedParse<10>("inx", chars_format::general); 320 ExpectFailedParse<16>("inx", chars_format::general); 321 ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity); 322 ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity); 323 ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity); 324 ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity); 325 ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity); 326 ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity); 327 } 328 329 TEST(ParseFloat, NaN) { 330 ExpectFailedParse<10>("na", chars_format::general); 331 ExpectFailedParse<16>("na", chars_format::general); 332 ExpectFailedParse<10>("nah", chars_format::general); 333 ExpectFailedParse<16>("nah", chars_format::general); 334 ExpectSpecial("nan$", chars_format::general, FloatType::kNan); 335 ExpectSpecial("NaN$", chars_format::general, FloatType::kNan); 336 ExpectSpecial("nAn$", chars_format::general, FloatType::kNan); 337 ExpectSpecial("NAN$", chars_format::general, FloatType::kNan); 338 ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan); 339 340 // A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to 341 // appear after an NaN. Check that this is allowed, and that the correct 342 // characters are grouped. 343 // 344 // (The characters [ and ] in the pattern below delimit the expected matched 345 // subgroup; they are not part of the input passed to ParseFloat.) 346 ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan); 347 ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan); 348 ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan); 349 ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan); 350 ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan); 351 // If the subgroup contains illegal characters, don't match it at all. 352 ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan); 353 // Also cope with a missing close paren. 354 ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan); 355 } 356 357 } // namespace