decode_rust_punycode_test.cc (22066B)
1 // Copyright 2024 The Abseil Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "absl/debugging/internal/decode_rust_punycode.h" 16 17 #include <cstddef> 18 #include <cstring> 19 #include <string> 20 21 #include "gmock/gmock.h" 22 #include "gtest/gtest.h" 23 #include "absl/base/config.h" 24 25 namespace absl { 26 ABSL_NAMESPACE_BEGIN 27 namespace debugging_internal { 28 namespace { 29 30 using ::testing::AllOf; 31 using ::testing::Eq; 32 using ::testing::IsNull; 33 using ::testing::Pointee; 34 using ::testing::ResultOf; 35 using ::testing::StrEq; 36 37 class DecodeRustPunycodeTest : public ::testing::Test { 38 protected: 39 void FillBufferWithNonzeroBytes() { 40 // The choice of nonzero value to fill with is arbitrary. The point is just 41 // to fail tests if DecodeRustPunycode forgets to write the final NUL 42 // character. 43 std::memset(buffer_storage_, 0xab, sizeof(buffer_storage_)); 44 } 45 46 DecodeRustPunycodeOptions WithAmpleSpace() { 47 FillBufferWithNonzeroBytes(); 48 49 DecodeRustPunycodeOptions options; 50 options.punycode_begin = punycode_.data(); 51 options.punycode_end = punycode_.data() + punycode_.size(); 52 options.out_begin = buffer_storage_; 53 options.out_end = buffer_storage_ + sizeof(buffer_storage_); 54 return options; 55 } 56 57 DecodeRustPunycodeOptions WithJustEnoughSpace() { 58 FillBufferWithNonzeroBytes(); 59 60 const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size() - 1; 61 DecodeRustPunycodeOptions options; 62 options.punycode_begin = punycode_.data(); 63 options.punycode_end = punycode_.data() + punycode_.size(); 64 options.out_begin = buffer_storage_ + begin_offset; 65 options.out_end = buffer_storage_ + sizeof(buffer_storage_); 66 return options; 67 } 68 69 DecodeRustPunycodeOptions WithOneByteTooFew() { 70 FillBufferWithNonzeroBytes(); 71 72 const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size(); 73 DecodeRustPunycodeOptions options; 74 options.punycode_begin = punycode_.data(); 75 options.punycode_end = punycode_.data() + punycode_.size(); 76 options.out_begin = buffer_storage_ + begin_offset; 77 options.out_end = buffer_storage_ + sizeof(buffer_storage_); 78 return options; 79 } 80 81 // Matches a correct return value of DecodeRustPunycode when `golden` is the 82 // expected plaintext output. 83 auto PointsToTheNulAfter(const std::string& golden) { 84 const size_t golden_size = golden.size(); 85 return AllOf( 86 Pointee(Eq('\0')), 87 ResultOf("preceding string body", 88 [golden_size](const char* p) { return p - golden_size; }, 89 StrEq(golden))); 90 } 91 92 std::string punycode_; 93 std::string plaintext_; 94 char buffer_storage_[1024]; 95 }; 96 97 TEST_F(DecodeRustPunycodeTest, MapsEmptyToEmpty) { 98 punycode_ = ""; 99 plaintext_ = ""; 100 101 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 102 PointsToTheNulAfter(plaintext_)); 103 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 104 PointsToTheNulAfter(plaintext_)); 105 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 106 } 107 108 TEST_F(DecodeRustPunycodeTest, 109 StripsTheTrailingDelimiterFromAPureRunOfBasicChars) { 110 punycode_ = "foo_"; 111 plaintext_ = "foo"; 112 113 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 114 PointsToTheNulAfter(plaintext_)); 115 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 116 PointsToTheNulAfter(plaintext_)); 117 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 118 } 119 120 TEST_F(DecodeRustPunycodeTest, TreatsTheLastUnderscoreAsTheDelimiter) { 121 punycode_ = "foo_bar_"; 122 plaintext_ = "foo_bar"; 123 124 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 125 PointsToTheNulAfter(plaintext_)); 126 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 127 PointsToTheNulAfter(plaintext_)); 128 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 129 } 130 131 TEST_F(DecodeRustPunycodeTest, AcceptsALeadingUnderscoreIfNotTheDelimiter) { 132 punycode_ = "_foo_"; 133 plaintext_ = "_foo"; 134 135 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 136 PointsToTheNulAfter(plaintext_)); 137 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 138 PointsToTheNulAfter(plaintext_)); 139 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 140 } 141 142 TEST_F(DecodeRustPunycodeTest, RejectsALeadingUnderscoreDelimiter) { 143 punycode_ = "_foo"; 144 145 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 146 } 147 148 TEST_F(DecodeRustPunycodeTest, RejectsEmbeddedNul) { 149 punycode_ = std::string("foo\0bar_", 8); 150 151 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 152 } 153 154 TEST_F(DecodeRustPunycodeTest, RejectsAsciiCharsOtherThanIdentifierChars) { 155 punycode_ = "foo\007_"; 156 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 157 158 punycode_ = "foo-_"; 159 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 160 161 punycode_ = "foo;_"; 162 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 163 164 punycode_ = "foo\177_"; 165 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 166 } 167 168 TEST_F(DecodeRustPunycodeTest, RejectsRawNonAsciiChars) { 169 punycode_ = "\x80"; 170 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 171 172 punycode_ = "\x80_"; 173 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 174 175 punycode_ = "\xff"; 176 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 177 178 punycode_ = "\xff_"; 179 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 180 } 181 182 TEST_F(DecodeRustPunycodeTest, RecognizesU0080) { 183 // a encodes 0, so the output is the smallest non-ASCII code point standing 184 // alone. (U+0080 PAD is not an identifier character, but DecodeRustPunycode 185 // does not check whether non-ASCII characters could belong to an identifier.) 186 punycode_ = "a"; 187 plaintext_ = "\xc2\x80"; 188 189 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 190 PointsToTheNulAfter(plaintext_)); 191 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 192 PointsToTheNulAfter(plaintext_)); 193 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 194 } 195 196 TEST_F(DecodeRustPunycodeTest, OneByteDeltaSequencesMustBeA) { 197 // Because bias = 72 for the first code point, any digit but a/A is nonfinal 198 // in one of the first two bytes of a delta sequence. 199 punycode_ = "b"; 200 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 201 202 punycode_ = "z"; 203 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 204 205 punycode_ = "0"; 206 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 207 208 punycode_ = "9"; 209 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 210 } 211 212 TEST_F(DecodeRustPunycodeTest, AcceptsDeltaSequenceBA) { 213 punycode_ = "ba"; 214 plaintext_ = "\xc2\x81"; 215 216 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 217 PointsToTheNulAfter(plaintext_)); 218 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 219 PointsToTheNulAfter(plaintext_)); 220 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 221 } 222 223 TEST_F(DecodeRustPunycodeTest, AcceptsOtherDeltaSequencesWithSecondByteA) { 224 punycode_ = "ca"; 225 plaintext_ = "\xc2\x82"; 226 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 227 PointsToTheNulAfter(plaintext_)); 228 229 punycode_ = "za"; 230 plaintext_ = "\xc2\x99"; 231 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 232 PointsToTheNulAfter(plaintext_)); 233 234 punycode_ = "0a"; 235 plaintext_ = "\xc2\x9a"; 236 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 237 PointsToTheNulAfter(plaintext_)); 238 239 punycode_ = "1a"; 240 plaintext_ = "\xc2\x9b"; 241 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 242 PointsToTheNulAfter(plaintext_)); 243 244 punycode_ = "9a"; 245 plaintext_ = "£"; // Pound sign, U+00A3 246 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 247 PointsToTheNulAfter(plaintext_)); 248 } 249 250 TEST_F(DecodeRustPunycodeTest, RejectsDeltaWhereTheSecondAndLastDigitIsNotA) { 251 punycode_ = "bb"; 252 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 253 254 punycode_ = "zz"; 255 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 256 257 punycode_ = "00"; 258 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 259 260 punycode_ = "99"; 261 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 262 } 263 264 TEST_F(DecodeRustPunycodeTest, AcceptsDeltasWithSecondByteBFollowedByA) { 265 punycode_ = "bba"; 266 plaintext_ = "¤"; // U+00A4 267 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 268 PointsToTheNulAfter(plaintext_)); 269 270 punycode_ = "cba"; 271 plaintext_ = "¥"; // U+00A5 272 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 273 PointsToTheNulAfter(plaintext_)); 274 275 punycode_ = "zba"; 276 plaintext_ = "¼"; // U+00BC 277 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 278 PointsToTheNulAfter(plaintext_)); 279 280 punycode_ = "0ba"; 281 plaintext_ = "½"; // U+00BD 282 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 283 PointsToTheNulAfter(plaintext_)); 284 285 punycode_ = "1ba"; 286 plaintext_ = "¾"; // U+00BE 287 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 288 PointsToTheNulAfter(plaintext_)); 289 290 punycode_ = "9ba"; 291 plaintext_ = "Æ"; // U+00C6 292 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 293 PointsToTheNulAfter(plaintext_)); 294 } 295 296 // Tests beyond this point use characters allowed in identifiers, so you can 297 // prepend _RNvC1cu<decimal length><underscore if [0-9_] follows> to a test 298 // input and run it through another Rust demangler to verify that the 299 // corresponding golden output is correct. 300 301 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAlone) { 302 punycode_ = "0ca"; 303 plaintext_ = "à"; 304 305 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 306 PointsToTheNulAfter(plaintext_)); 307 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 308 PointsToTheNulAfter(plaintext_)); 309 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 310 } 311 312 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharBeforeBasicChars) { 313 punycode_ = "_la_mode_yya"; 314 plaintext_ = "à_la_mode"; 315 316 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 317 PointsToTheNulAfter(plaintext_)); 318 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 319 PointsToTheNulAfter(plaintext_)); 320 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 321 } 322 323 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAmidBasicChars) { 324 punycode_ = "verre__vin_m4a"; 325 plaintext_ = "verre_à_vin"; 326 327 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 328 PointsToTheNulAfter(plaintext_)); 329 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 330 PointsToTheNulAfter(plaintext_)); 331 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 332 } 333 334 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAfterBasicChars) { 335 punycode_ = "belt_3na"; 336 plaintext_ = "beltà"; 337 338 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 339 PointsToTheNulAfter(plaintext_)); 340 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 341 PointsToTheNulAfter(plaintext_)); 342 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 343 } 344 345 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedTwoByteChar) { 346 punycode_ = "0caaaa"; 347 plaintext_ = "àààà"; 348 349 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 350 PointsToTheNulAfter(plaintext_)); 351 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 352 PointsToTheNulAfter(plaintext_)); 353 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 354 } 355 356 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsInOrder) { 357 punycode_ = "3camsuz"; 358 plaintext_ = "ãéïôù"; 359 360 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 361 PointsToTheNulAfter(plaintext_)); 362 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 363 PointsToTheNulAfter(plaintext_)); 364 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 365 } 366 367 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsOutOfOrder) { 368 punycode_ = "3caltsx"; 369 plaintext_ = "ùéôãï"; 370 371 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 372 PointsToTheNulAfter(plaintext_)); 373 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 374 PointsToTheNulAfter(plaintext_)); 375 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 376 } 377 378 TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharAlone) { 379 punycode_ = "fiq"; 380 plaintext_ = "中"; 381 382 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 383 PointsToTheNulAfter(plaintext_)); 384 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 385 PointsToTheNulAfter(plaintext_)); 386 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 387 } 388 389 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedThreeByteChar) { 390 punycode_ = "fiqaaaa"; 391 plaintext_ = "中中中中中"; 392 393 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 394 PointsToTheNulAfter(plaintext_)); 395 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 396 PointsToTheNulAfter(plaintext_)); 397 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 398 } 399 400 TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharsInOrder) { 401 punycode_ = "fiq228c"; 402 plaintext_ = "中文"; 403 404 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 405 PointsToTheNulAfter(plaintext_)); 406 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 407 PointsToTheNulAfter(plaintext_)); 408 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 409 } 410 411 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyThreeByteCharsOutOfOrder) { 412 punycode_ = "fiq128c"; 413 plaintext_ = "文中"; 414 415 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 416 PointsToTheNulAfter(plaintext_)); 417 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 418 PointsToTheNulAfter(plaintext_)); 419 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 420 } 421 422 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAlone) { 423 punycode_ = "uy7h"; 424 plaintext_ = "🂻"; 425 426 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 427 PointsToTheNulAfter(plaintext_)); 428 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 429 PointsToTheNulAfter(plaintext_)); 430 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 431 } 432 433 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharBeforeBasicChars) { 434 punycode_ = "jack__uh63d"; 435 plaintext_ = "jack_🂻"; 436 437 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 438 PointsToTheNulAfter(plaintext_)); 439 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 440 PointsToTheNulAfter(plaintext_)); 441 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 442 } 443 444 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAmidBasicChars) { 445 punycode_ = "jack__of_hearts_ki37n"; 446 plaintext_ = "jack_🂻_of_hearts"; 447 448 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 449 PointsToTheNulAfter(plaintext_)); 450 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 451 PointsToTheNulAfter(plaintext_)); 452 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 453 } 454 455 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAfterBasicChars) { 456 punycode_ = "_of_hearts_kz45i"; 457 plaintext_ = "🂻_of_hearts"; 458 459 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 460 PointsToTheNulAfter(plaintext_)); 461 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 462 PointsToTheNulAfter(plaintext_)); 463 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 464 } 465 466 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedFourByteChar) { 467 punycode_ = "uy7haaaa"; 468 plaintext_ = "🂻🂻🂻🂻🂻"; 469 470 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 471 PointsToTheNulAfter(plaintext_)); 472 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 473 PointsToTheNulAfter(plaintext_)); 474 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 475 } 476 477 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsInOrder) { 478 punycode_ = "8x7hcjmf"; 479 plaintext_ = "🂦🂧🂪🂭🂮"; 480 481 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 482 PointsToTheNulAfter(plaintext_)); 483 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 484 PointsToTheNulAfter(plaintext_)); 485 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 486 } 487 488 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsOutOfOrder) { 489 punycode_ = "8x7hcild"; 490 plaintext_ = "🂮🂦🂭🂪🂧"; 491 492 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 493 PointsToTheNulAfter(plaintext_)); 494 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 495 PointsToTheNulAfter(plaintext_)); 496 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 497 } 498 499 TEST_F(DecodeRustPunycodeTest, AcceptsAMixtureOfByteLengths) { 500 punycode_ = "3caltsx2079ivf8aiuy7cja3a6ak"; 501 plaintext_ = "ùéôãï中文🂮🂦🂭🂪🂧"; 502 503 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 504 PointsToTheNulAfter(plaintext_)); 505 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 506 PointsToTheNulAfter(plaintext_)); 507 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 508 } 509 510 TEST_F(DecodeRustPunycodeTest, RejectsOverlargeDeltas) { 511 punycode_ = "123456789a"; 512 513 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); 514 } 515 516 // Finally, we test on a few prose and poetry snippets as a defense in depth. 517 // If our artificial short test inputs did not exercise a bug that is tickled by 518 // patterns typical of real human writing, maybe real human writing will catch 519 // that. 520 // 521 // These test inputs are extracted from texts old enough to be out of copyright 522 // that probe a variety of ranges of code-point space. All are longer than 32 523 // code points, so they exercise the carrying of seminibbles from one uint64_t 524 // to the next higher one in BoundedUtf8LengthSequence. 525 526 // The first three lines of the Old English epic _Beowulf_, mostly ASCII with a 527 // few archaic two-byte letters interspersed. 528 TEST_F(DecodeRustPunycodeTest, Beowulf) { 529 punycode_ = "hwt_we_gardena_in_geardagum_" 530 "eodcyninga_rym_gefrunon_" 531 "hu_a_elingas_ellen_fremedon_hxg9c70do9alau"; 532 plaintext_ = "hwæt_we_gardena_in_geardagum_" 533 "þeodcyninga_þrym_gefrunon_" 534 "hu_ða_æþelingas_ellen_fremedon"; 535 536 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 537 PointsToTheNulAfter(plaintext_)); 538 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 539 PointsToTheNulAfter(plaintext_)); 540 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 541 } 542 543 // The whole of 過故人莊 by the 8th-century Chinese poet 孟浩然 544 // (Meng Haoran), exercising three-byte-character processing. 545 TEST_F(DecodeRustPunycodeTest, MengHaoran) { 546 punycode_ = "gmq4ss0cfvao1e2wg8mcw8b0wkl9a7tt90a8riuvbk7t8kbv9a66ogofvzlf6" 547 "3d01ybn1u28dyqi5q2cxyyxnk5d2gx1ks9ddvfm17bk6gbsd6wftrav60u4ta"; 548 plaintext_ = "故人具雞黍" "邀我至田家" 549 "綠樹村邊合" "青山郭外斜" 550 "開軒面場圃" "把酒話桑麻" 551 "待到重陽日" "還來就菊花"; 552 553 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 554 PointsToTheNulAfter(plaintext_)); 555 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 556 PointsToTheNulAfter(plaintext_)); 557 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 558 } 559 560 // A poem of the 8th-century Japanese poet 山上憶良 (Yamanoue no Okura). 561 // Japanese mixes two-byte and three-byte characters: a good workout for codecs. 562 TEST_F(DecodeRustPunycodeTest, YamanoueNoOkura) { 563 punycode_ = "48jdaa3a6ccpepjrsmlb0q4bwcdtid8fg6c0cai9822utqeruk3om0u4f2wbp0" 564 "em23do0op23cc2ff70mb6tae8aq759gja"; 565 plaintext_ = "瓜食めば" 566 "子ども思ほゆ" 567 "栗食めば" 568 "まして偲はゆ" 569 "何処より" 570 "来りしものそ" 571 "眼交に" 572 "もとな懸りて" 573 "安眠し寝さぬ"; 574 575 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 576 PointsToTheNulAfter(plaintext_)); 577 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 578 PointsToTheNulAfter(plaintext_)); 579 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 580 } 581 582 // The first two lines of the Phoenician-language inscription on the sarcophagus 583 // of Eshmunazar II of Sidon, 6th century BCE. Phoenician and many other 584 // archaic scripts are allocated in the Supplemental Multilingual Plane (U+10000 585 // through U+1FFFF) and thus exercise four-byte-character processing. 586 TEST_F(DecodeRustPunycodeTest, EshmunazarSarcophagus) { 587 punycode_ = "wj9caaabaabbaaohcacxvhdc7bgxbccbdcjeacddcedcdlddbdbddcdbdcknfcee" 588 "ifel8del2a7inq9fhcpxikms7a4a9ac9ataaa0g"; 589 plaintext_ = "𐤁𐤉𐤓𐤇𐤁𐤋𐤁𐤔𐤍𐤕𐤏𐤎𐤓" 590 "𐤅𐤀𐤓𐤁𐤏𐤗𐤖𐤖𐤖𐤖𐤋𐤌𐤋𐤊𐤉𐤌𐤋𐤊" 591 "𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊𐤑𐤃𐤍𐤌" 592 "𐤁𐤍𐤌𐤋𐤊𐤕𐤁𐤍𐤕𐤌𐤋𐤊𐤑𐤃𐤍𐤌" 593 "𐤃𐤁𐤓𐤌𐤋𐤊𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊" 594 "𐤑𐤃𐤍𐤌𐤋𐤀𐤌𐤓𐤍𐤂𐤆𐤋𐤕"; 595 596 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), 597 PointsToTheNulAfter(plaintext_)); 598 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), 599 PointsToTheNulAfter(plaintext_)); 600 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); 601 } 602 603 } // namespace 604 } // namespace debugging_internal 605 ABSL_NAMESPACE_END 606 } // namespace absl