tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

decode_rust_punycode_test.cc (22066B)


      1 // Copyright 2024 The Abseil Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "absl/debugging/internal/decode_rust_punycode.h"
     16 
     17 #include <cstddef>
     18 #include <cstring>
     19 #include <string>
     20 
     21 #include "gmock/gmock.h"
     22 #include "gtest/gtest.h"
     23 #include "absl/base/config.h"
     24 
     25 namespace absl {
     26 ABSL_NAMESPACE_BEGIN
     27 namespace debugging_internal {
     28 namespace {
     29 
     30 using ::testing::AllOf;
     31 using ::testing::Eq;
     32 using ::testing::IsNull;
     33 using ::testing::Pointee;
     34 using ::testing::ResultOf;
     35 using ::testing::StrEq;
     36 
     37 class DecodeRustPunycodeTest : public ::testing::Test {
     38 protected:
     39  void FillBufferWithNonzeroBytes() {
     40    // The choice of nonzero value to fill with is arbitrary.  The point is just
     41    // to fail tests if DecodeRustPunycode forgets to write the final NUL
     42    // character.
     43    std::memset(buffer_storage_, 0xab, sizeof(buffer_storage_));
     44  }
     45 
     46  DecodeRustPunycodeOptions WithAmpleSpace() {
     47    FillBufferWithNonzeroBytes();
     48 
     49    DecodeRustPunycodeOptions options;
     50    options.punycode_begin = punycode_.data();
     51    options.punycode_end = punycode_.data() + punycode_.size();
     52    options.out_begin = buffer_storage_;
     53    options.out_end = buffer_storage_ + sizeof(buffer_storage_);
     54    return options;
     55  }
     56 
     57  DecodeRustPunycodeOptions WithJustEnoughSpace() {
     58    FillBufferWithNonzeroBytes();
     59 
     60    const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size() - 1;
     61    DecodeRustPunycodeOptions options;
     62    options.punycode_begin = punycode_.data();
     63    options.punycode_end = punycode_.data() + punycode_.size();
     64    options.out_begin = buffer_storage_ + begin_offset;
     65    options.out_end = buffer_storage_ + sizeof(buffer_storage_);
     66    return options;
     67  }
     68 
     69  DecodeRustPunycodeOptions WithOneByteTooFew() {
     70    FillBufferWithNonzeroBytes();
     71 
     72    const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size();
     73    DecodeRustPunycodeOptions options;
     74    options.punycode_begin = punycode_.data();
     75    options.punycode_end = punycode_.data() + punycode_.size();
     76    options.out_begin = buffer_storage_ + begin_offset;
     77    options.out_end = buffer_storage_ + sizeof(buffer_storage_);
     78    return options;
     79  }
     80 
     81  // Matches a correct return value of DecodeRustPunycode when `golden` is the
     82  // expected plaintext output.
     83  auto PointsToTheNulAfter(const std::string& golden) {
     84    const size_t golden_size = golden.size();
     85    return AllOf(
     86        Pointee(Eq('\0')),
     87        ResultOf("preceding string body",
     88                 [golden_size](const char* p) { return p - golden_size; },
     89                 StrEq(golden)));
     90  }
     91 
     92  std::string punycode_;
     93  std::string plaintext_;
     94  char buffer_storage_[1024];
     95 };
     96 
     97 TEST_F(DecodeRustPunycodeTest, MapsEmptyToEmpty) {
     98  punycode_ = "";
     99  plaintext_ = "";
    100 
    101  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    102              PointsToTheNulAfter(plaintext_));
    103  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    104              PointsToTheNulAfter(plaintext_));
    105  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    106 }
    107 
    108 TEST_F(DecodeRustPunycodeTest,
    109       StripsTheTrailingDelimiterFromAPureRunOfBasicChars) {
    110  punycode_ = "foo_";
    111  plaintext_ = "foo";
    112 
    113  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    114              PointsToTheNulAfter(plaintext_));
    115  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    116              PointsToTheNulAfter(plaintext_));
    117  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    118 }
    119 
    120 TEST_F(DecodeRustPunycodeTest, TreatsTheLastUnderscoreAsTheDelimiter) {
    121  punycode_ = "foo_bar_";
    122  plaintext_ = "foo_bar";
    123 
    124  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    125              PointsToTheNulAfter(plaintext_));
    126  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    127              PointsToTheNulAfter(plaintext_));
    128  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    129 }
    130 
    131 TEST_F(DecodeRustPunycodeTest, AcceptsALeadingUnderscoreIfNotTheDelimiter) {
    132  punycode_ = "_foo_";
    133  plaintext_ = "_foo";
    134 
    135  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    136              PointsToTheNulAfter(plaintext_));
    137  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    138              PointsToTheNulAfter(plaintext_));
    139  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    140 }
    141 
    142 TEST_F(DecodeRustPunycodeTest, RejectsALeadingUnderscoreDelimiter) {
    143  punycode_ = "_foo";
    144 
    145  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    146 }
    147 
    148 TEST_F(DecodeRustPunycodeTest, RejectsEmbeddedNul) {
    149  punycode_ = std::string("foo\0bar_", 8);
    150 
    151  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    152 }
    153 
    154 TEST_F(DecodeRustPunycodeTest, RejectsAsciiCharsOtherThanIdentifierChars) {
    155  punycode_ = "foo\007_";
    156  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    157 
    158  punycode_ = "foo-_";
    159  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    160 
    161  punycode_ = "foo;_";
    162  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    163 
    164  punycode_ = "foo\177_";
    165  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    166 }
    167 
    168 TEST_F(DecodeRustPunycodeTest, RejectsRawNonAsciiChars) {
    169  punycode_ = "\x80";
    170  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    171 
    172  punycode_ = "\x80_";
    173  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    174 
    175  punycode_ = "\xff";
    176  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    177 
    178  punycode_ = "\xff_";
    179  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    180 }
    181 
    182 TEST_F(DecodeRustPunycodeTest, RecognizesU0080) {
    183  // a encodes 0, so the output is the smallest non-ASCII code point standing
    184  // alone.  (U+0080 PAD is not an identifier character, but DecodeRustPunycode
    185  // does not check whether non-ASCII characters could belong to an identifier.)
    186  punycode_ = "a";
    187  plaintext_ = "\xc2\x80";
    188 
    189  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    190              PointsToTheNulAfter(plaintext_));
    191  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    192              PointsToTheNulAfter(plaintext_));
    193  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    194 }
    195 
    196 TEST_F(DecodeRustPunycodeTest, OneByteDeltaSequencesMustBeA) {
    197  // Because bias = 72 for the first code point, any digit but a/A is nonfinal
    198  // in one of the first two bytes of a delta sequence.
    199  punycode_ = "b";
    200  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    201 
    202  punycode_ = "z";
    203  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    204 
    205  punycode_ = "0";
    206  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    207 
    208  punycode_ = "9";
    209  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    210 }
    211 
    212 TEST_F(DecodeRustPunycodeTest, AcceptsDeltaSequenceBA) {
    213  punycode_ = "ba";
    214  plaintext_ = "\xc2\x81";
    215 
    216  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    217              PointsToTheNulAfter(plaintext_));
    218  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    219              PointsToTheNulAfter(plaintext_));
    220  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    221 }
    222 
    223 TEST_F(DecodeRustPunycodeTest, AcceptsOtherDeltaSequencesWithSecondByteA) {
    224  punycode_ = "ca";
    225  plaintext_ = "\xc2\x82";
    226  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    227              PointsToTheNulAfter(plaintext_));
    228 
    229  punycode_ = "za";
    230  plaintext_ = "\xc2\x99";
    231  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    232              PointsToTheNulAfter(plaintext_));
    233 
    234  punycode_ = "0a";
    235  plaintext_ = "\xc2\x9a";
    236  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    237              PointsToTheNulAfter(plaintext_));
    238 
    239  punycode_ = "1a";
    240  plaintext_ = "\xc2\x9b";
    241  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    242              PointsToTheNulAfter(plaintext_));
    243 
    244  punycode_ = "9a";
    245  plaintext_ = "£";  // Pound sign, U+00A3
    246  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    247              PointsToTheNulAfter(plaintext_));
    248 }
    249 
    250 TEST_F(DecodeRustPunycodeTest, RejectsDeltaWhereTheSecondAndLastDigitIsNotA) {
    251  punycode_ = "bb";
    252  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    253 
    254  punycode_ = "zz";
    255  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    256 
    257  punycode_ = "00";
    258  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    259 
    260  punycode_ = "99";
    261  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    262 }
    263 
    264 TEST_F(DecodeRustPunycodeTest, AcceptsDeltasWithSecondByteBFollowedByA) {
    265  punycode_ = "bba";
    266  plaintext_ = "¤";  // U+00A4
    267  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    268              PointsToTheNulAfter(plaintext_));
    269 
    270  punycode_ = "cba";
    271  plaintext_ = "¥";  // U+00A5
    272  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    273              PointsToTheNulAfter(plaintext_));
    274 
    275  punycode_ = "zba";
    276  plaintext_ = "¼";  // U+00BC
    277  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    278              PointsToTheNulAfter(plaintext_));
    279 
    280  punycode_ = "0ba";
    281  plaintext_ = "½";  // U+00BD
    282  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    283              PointsToTheNulAfter(plaintext_));
    284 
    285  punycode_ = "1ba";
    286  plaintext_ = "¾";  // U+00BE
    287  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    288              PointsToTheNulAfter(plaintext_));
    289 
    290  punycode_ = "9ba";
    291  plaintext_ = "Æ";  // U+00C6
    292  EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    293              PointsToTheNulAfter(plaintext_));
    294 }
    295 
    296 // Tests beyond this point use characters allowed in identifiers, so you can
    297 // prepend _RNvC1cu<decimal length><underscore if [0-9_] follows> to a test
    298 // input and run it through another Rust demangler to verify that the
    299 // corresponding golden output is correct.
    300 
    301 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAlone) {
    302  punycode_ = "0ca";
    303  plaintext_ = "à";
    304 
    305  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    306              PointsToTheNulAfter(plaintext_));
    307  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    308              PointsToTheNulAfter(plaintext_));
    309  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    310 }
    311 
    312 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharBeforeBasicChars) {
    313  punycode_ = "_la_mode_yya";
    314  plaintext_ = "à_la_mode";
    315 
    316  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    317              PointsToTheNulAfter(plaintext_));
    318  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    319              PointsToTheNulAfter(plaintext_));
    320  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    321 }
    322 
    323 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAmidBasicChars) {
    324  punycode_ = "verre__vin_m4a";
    325  plaintext_ = "verre_à_vin";
    326 
    327  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    328              PointsToTheNulAfter(plaintext_));
    329  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    330              PointsToTheNulAfter(plaintext_));
    331  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    332 }
    333 
    334 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAfterBasicChars) {
    335  punycode_ = "belt_3na";
    336  plaintext_ = "beltà";
    337 
    338  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    339              PointsToTheNulAfter(plaintext_));
    340  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    341              PointsToTheNulAfter(plaintext_));
    342  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    343 }
    344 
    345 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedTwoByteChar) {
    346  punycode_ = "0caaaa";
    347  plaintext_ = "àààà";
    348 
    349  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    350              PointsToTheNulAfter(plaintext_));
    351  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    352              PointsToTheNulAfter(plaintext_));
    353  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    354 }
    355 
    356 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsInOrder) {
    357  punycode_ = "3camsuz";
    358  plaintext_ = "ãéïôù";
    359 
    360  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    361              PointsToTheNulAfter(plaintext_));
    362  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    363              PointsToTheNulAfter(plaintext_));
    364  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    365 }
    366 
    367 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsOutOfOrder) {
    368  punycode_ = "3caltsx";
    369  plaintext_ = "ùéôãï";
    370 
    371  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    372              PointsToTheNulAfter(plaintext_));
    373  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    374              PointsToTheNulAfter(plaintext_));
    375  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    376 }
    377 
    378 TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharAlone) {
    379  punycode_ = "fiq";
    380  plaintext_ = "中";
    381 
    382  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    383              PointsToTheNulAfter(plaintext_));
    384  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    385              PointsToTheNulAfter(plaintext_));
    386  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    387 }
    388 
    389 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedThreeByteChar) {
    390  punycode_ = "fiqaaaa";
    391  plaintext_ = "中中中中中";
    392 
    393  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    394              PointsToTheNulAfter(plaintext_));
    395  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    396              PointsToTheNulAfter(plaintext_));
    397  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    398 }
    399 
    400 TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharsInOrder) {
    401  punycode_ = "fiq228c";
    402  plaintext_ = "中文";
    403 
    404  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    405              PointsToTheNulAfter(plaintext_));
    406  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    407              PointsToTheNulAfter(plaintext_));
    408  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    409 }
    410 
    411 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyThreeByteCharsOutOfOrder) {
    412  punycode_ = "fiq128c";
    413  plaintext_ = "文中";
    414 
    415  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    416              PointsToTheNulAfter(plaintext_));
    417  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    418              PointsToTheNulAfter(plaintext_));
    419  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    420 }
    421 
    422 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAlone) {
    423  punycode_ = "uy7h";
    424  plaintext_ = "🂻";
    425 
    426  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    427              PointsToTheNulAfter(plaintext_));
    428  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    429              PointsToTheNulAfter(plaintext_));
    430  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    431 }
    432 
    433 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharBeforeBasicChars) {
    434  punycode_ = "jack__uh63d";
    435  plaintext_ = "jack_🂻";
    436 
    437  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    438              PointsToTheNulAfter(plaintext_));
    439  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    440              PointsToTheNulAfter(plaintext_));
    441  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    442 }
    443 
    444 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAmidBasicChars) {
    445  punycode_ = "jack__of_hearts_ki37n";
    446  plaintext_ = "jack_🂻_of_hearts";
    447 
    448  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    449              PointsToTheNulAfter(plaintext_));
    450  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    451              PointsToTheNulAfter(plaintext_));
    452  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    453 }
    454 
    455 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAfterBasicChars) {
    456  punycode_ = "_of_hearts_kz45i";
    457  plaintext_ = "🂻_of_hearts";
    458 
    459  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    460              PointsToTheNulAfter(plaintext_));
    461  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    462              PointsToTheNulAfter(plaintext_));
    463  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    464 }
    465 
    466 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedFourByteChar) {
    467  punycode_ = "uy7haaaa";
    468  plaintext_ = "🂻🂻🂻🂻🂻";
    469 
    470  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    471              PointsToTheNulAfter(plaintext_));
    472  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    473              PointsToTheNulAfter(plaintext_));
    474  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    475 }
    476 
    477 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsInOrder) {
    478  punycode_ = "8x7hcjmf";
    479  plaintext_ = "🂦🂧🂪🂭🂮";
    480 
    481  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    482              PointsToTheNulAfter(plaintext_));
    483  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    484              PointsToTheNulAfter(plaintext_));
    485  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    486 }
    487 
    488 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsOutOfOrder) {
    489  punycode_ = "8x7hcild";
    490  plaintext_ = "🂮🂦🂭🂪🂧";
    491 
    492  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    493              PointsToTheNulAfter(plaintext_));
    494  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    495              PointsToTheNulAfter(plaintext_));
    496  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    497 }
    498 
    499 TEST_F(DecodeRustPunycodeTest, AcceptsAMixtureOfByteLengths) {
    500  punycode_ = "3caltsx2079ivf8aiuy7cja3a6ak";
    501  plaintext_ = "ùéôãï中文🂮🂦🂭🂪🂧";
    502 
    503  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    504              PointsToTheNulAfter(plaintext_));
    505  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    506              PointsToTheNulAfter(plaintext_));
    507  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    508 }
    509 
    510 TEST_F(DecodeRustPunycodeTest, RejectsOverlargeDeltas) {
    511  punycode_ = "123456789a";
    512 
    513  EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
    514 }
    515 
    516 // Finally, we test on a few prose and poetry snippets as a defense in depth.
    517 // If our artificial short test inputs did not exercise a bug that is tickled by
    518 // patterns typical of real human writing, maybe real human writing will catch
    519 // that.
    520 //
    521 // These test inputs are extracted from texts old enough to be out of copyright
    522 // that probe a variety of ranges of code-point space.  All are longer than 32
    523 // code points, so they exercise the carrying of seminibbles from one uint64_t
    524 // to the next higher one in BoundedUtf8LengthSequence.
    525 
    526 // The first three lines of the Old English epic _Beowulf_, mostly ASCII with a
    527 // few archaic two-byte letters interspersed.
    528 TEST_F(DecodeRustPunycodeTest, Beowulf) {
    529  punycode_ = "hwt_we_gardena_in_geardagum_"
    530              "eodcyninga_rym_gefrunon_"
    531              "hu_a_elingas_ellen_fremedon_hxg9c70do9alau";
    532  plaintext_ = "hwæt_we_gardena_in_geardagum_"
    533               "þeodcyninga_þrym_gefrunon_"
    534               "hu_ða_æþelingas_ellen_fremedon";
    535 
    536  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    537              PointsToTheNulAfter(plaintext_));
    538  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    539              PointsToTheNulAfter(plaintext_));
    540  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    541 }
    542 
    543 // The whole of 過故人莊 by the 8th-century Chinese poet 孟浩然
    544 // (Meng Haoran), exercising three-byte-character processing.
    545 TEST_F(DecodeRustPunycodeTest, MengHaoran) {
    546  punycode_ = "gmq4ss0cfvao1e2wg8mcw8b0wkl9a7tt90a8riuvbk7t8kbv9a66ogofvzlf6"
    547              "3d01ybn1u28dyqi5q2cxyyxnk5d2gx1ks9ddvfm17bk6gbsd6wftrav60u4ta";
    548  plaintext_ = "故人具雞黍" "邀我至田家"
    549               "綠樹村邊合" "青山郭外斜"
    550               "開軒面場圃" "把酒話桑麻"
    551               "待到重陽日" "還來就菊花";
    552 
    553  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    554              PointsToTheNulAfter(plaintext_));
    555  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    556              PointsToTheNulAfter(plaintext_));
    557  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    558 }
    559 
    560 // A poem of the 8th-century Japanese poet 山上憶良 (Yamanoue no Okura).
    561 // Japanese mixes two-byte and three-byte characters: a good workout for codecs.
    562 TEST_F(DecodeRustPunycodeTest, YamanoueNoOkura) {
    563  punycode_ = "48jdaa3a6ccpepjrsmlb0q4bwcdtid8fg6c0cai9822utqeruk3om0u4f2wbp0"
    564              "em23do0op23cc2ff70mb6tae8aq759gja";
    565  plaintext_ = "瓜食めば"
    566               "子ども思ほゆ"
    567               "栗食めば"
    568               "まして偲はゆ"
    569               "何処より"
    570               "来りしものそ"
    571               "眼交に"
    572               "もとな懸りて"
    573               "安眠し寝さぬ";
    574 
    575  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    576              PointsToTheNulAfter(plaintext_));
    577  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    578              PointsToTheNulAfter(plaintext_));
    579  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    580 }
    581 
    582 // The first two lines of the Phoenician-language inscription on the sarcophagus
    583 // of Eshmunazar II of Sidon, 6th century BCE.  Phoenician and many other
    584 // archaic scripts are allocated in the Supplemental Multilingual Plane (U+10000
    585 // through U+1FFFF) and thus exercise four-byte-character processing.
    586 TEST_F(DecodeRustPunycodeTest, EshmunazarSarcophagus) {
    587  punycode_ = "wj9caaabaabbaaohcacxvhdc7bgxbccbdcjeacddcedcdlddbdbddcdbdcknfcee"
    588              "ifel8del2a7inq9fhcpxikms7a4a9ac9ataaa0g";
    589  plaintext_ = "𐤁𐤉𐤓𐤇𐤁𐤋𐤁𐤔𐤍𐤕𐤏𐤎𐤓"
    590               "𐤅𐤀𐤓𐤁𐤏𐤗𐤖𐤖𐤖𐤖𐤋𐤌𐤋𐤊𐤉𐤌𐤋𐤊"
    591               "𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊𐤑𐤃𐤍𐤌"
    592               "𐤁𐤍𐤌𐤋𐤊𐤕𐤁𐤍𐤕𐤌𐤋𐤊𐤑𐤃𐤍𐤌"
    593               "𐤃𐤁𐤓𐤌𐤋𐤊𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊"
    594               "𐤑𐤃𐤍𐤌𐤋𐤀𐤌𐤓𐤍𐤂𐤆𐤋𐤕";
    595 
    596  ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
    597              PointsToTheNulAfter(plaintext_));
    598  ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
    599              PointsToTheNulAfter(plaintext_));
    600  EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
    601 }
    602 
    603 }  // namespace
    604 }  // namespace debugging_internal
    605 ABSL_NAMESPACE_END
    606 }  // namespace absl