tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

testParserAtom.cpp (14579B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #include "mozilla/Range.h"  // mozilla::Range
      6 #include "mozilla/Utf8.h"   // mozilla::Utf8Unit
      7 
      8 #include <string>  // std::char_traits
      9 #include <vector>  // std::vector
     10 
     11 #include "frontend/FrontendContext.h"  // AutoReportFrontendContext
     12 #include "frontend/ParserAtom.h"  // js::frontend::ParserAtomsTable, js::frontend::WellKnownParserAtoms
     13 #include "js/TypeDecls.h"  // JS::Latin1Char
     14 #include "jsapi-tests/tests.h"
     15 
     16 // Test empty strings behave consistently.
     17 BEGIN_TEST(testParserAtom_empty) {
     18  using js::frontend::ParserAtom;
     19  using js::frontend::ParserAtomsTable;
     20  using js::frontend::ParserAtomVector;
     21  using js::frontend::TaggedParserAtomIndex;
     22 
     23  js::AutoReportFrontendContext fc(cx);
     24  js::LifoAlloc alloc(512, js::MallocArena);
     25  ParserAtomsTable atomTable(alloc);
     26 
     27  const char ascii[] = {};
     28  const JS::Latin1Char latin1[] = {};
     29  const mozilla::Utf8Unit utf8[] = {};
     30  const char16_t char16[] = {};
     31 
     32  // Check that the well-known empty atom matches for different entry points.
     33  auto refIndex = TaggedParserAtomIndex::WellKnown::empty();
     34  CHECK(atomTable.internAscii(&fc, ascii, 0) == refIndex);
     35  CHECK(atomTable.internLatin1(&fc, latin1, 0) == refIndex);
     36  CHECK(atomTable.internUtf8(&fc, utf8, 0) == refIndex);
     37  CHECK(atomTable.internChar16(&fc, char16, 0) == refIndex);
     38 
     39  return true;
     40 }
     41 END_TEST(testParserAtom_empty)
     42 
     43 // Test length-1 fast-path is consistent across entry points for ASCII.
     44 BEGIN_TEST(testParserAtom_tiny1_ASCII) {
     45  using js::frontend::ParserAtom;
     46  using js::frontend::ParserAtomsTable;
     47  using js::frontend::ParserAtomVector;
     48  using js::frontend::WellKnownParserAtoms;
     49 
     50  js::AutoReportFrontendContext fc(cx);
     51  js::LifoAlloc alloc(512, js::MallocArena);
     52  ParserAtomsTable atomTable(alloc);
     53 
     54  char16_t a = 'a';
     55  const char ascii[] = {'a'};
     56  JS::Latin1Char latin1[] = {'a'};
     57  const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a')};
     58  char16_t char16[] = {'a'};
     59 
     60  auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(&a, 1);
     61  CHECK(refIndex);
     62  CHECK(atomTable.internAscii(&fc, ascii, 1) == refIndex);
     63  CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
     64  CHECK(atomTable.internUtf8(&fc, utf8, 1) == refIndex);
     65  CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex);
     66 
     67  return true;
     68 }
     69 END_TEST(testParserAtom_tiny1_ASCII)
     70 
     71 // Test length-1 fast-path is consistent across entry points for non-ASCII.
     72 BEGIN_TEST(testParserAtom_tiny1_nonASCII) {
     73  using js::frontend::ParserAtom;
     74  using js::frontend::ParserAtomsTable;
     75  using js::frontend::ParserAtomVector;
     76  using js::frontend::WellKnownParserAtoms;
     77 
     78  js::AutoReportFrontendContext fc(cx);
     79  js::LifoAlloc alloc(512, js::MallocArena);
     80  ParserAtomsTable atomTable(alloc);
     81 
     82  {
     83    char16_t euro = 0x0080;
     84    JS::Latin1Char latin1[] = {0x80};
     85    const mozilla::Utf8Unit utf8[] = {
     86        mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)),
     87        mozilla::Utf8Unit(static_cast<unsigned char>(0x80))};
     88    char16_t char16[] = {0x0080};
     89 
     90    auto refIndex =
     91        WellKnownParserAtoms::getSingleton().lookupTinyIndex(&euro, 1);
     92    CHECK(refIndex);
     93    CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
     94    CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex);
     95    CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex);
     96  }
     97 
     98  {
     99    char16_t frac12 = 0x00BD;
    100    JS::Latin1Char latin1[] = {0xBD};
    101    const mozilla::Utf8Unit utf8[] = {
    102        mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)),
    103        mozilla::Utf8Unit(static_cast<unsigned char>(0xBD))};
    104    char16_t char16[] = {0x00BD};
    105 
    106    auto refIndex =
    107        WellKnownParserAtoms::getSingleton().lookupTinyIndex(&frac12, 1);
    108    CHECK(refIndex);
    109    CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
    110    CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex);
    111    CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex);
    112  }
    113 
    114  {
    115    char16_t iquest = 0x00BF;
    116    JS::Latin1Char latin1[] = {0xBF};
    117    const mozilla::Utf8Unit utf8[] = {
    118        mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)),
    119        mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))};
    120    char16_t char16[] = {0x00BF};
    121 
    122    auto refIndex =
    123        WellKnownParserAtoms::getSingleton().lookupTinyIndex(&iquest, 1);
    124    CHECK(refIndex);
    125    CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
    126    CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex);
    127    CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex);
    128  }
    129 
    130  {
    131    char16_t agrave = 0x00C0;
    132    JS::Latin1Char latin1[] = {0xC0};
    133    const mozilla::Utf8Unit utf8[] = {
    134        mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)),
    135        mozilla::Utf8Unit(static_cast<unsigned char>(0x80))};
    136    char16_t char16[] = {0x00C0};
    137 
    138    auto refIndex =
    139        WellKnownParserAtoms::getSingleton().lookupTinyIndex(&agrave, 1);
    140    CHECK(refIndex);
    141    CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
    142    CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex);
    143    CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex);
    144  }
    145 
    146  {
    147    char16_t ae = 0x00E6;
    148    JS::Latin1Char latin1[] = {0xE6};
    149    const mozilla::Utf8Unit utf8[] = {
    150        mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)),
    151        mozilla::Utf8Unit(static_cast<unsigned char>(0xA6))};
    152    char16_t char16[] = {0x00E6};
    153 
    154    auto refIndex =
    155        WellKnownParserAtoms::getSingleton().lookupTinyIndex(&ae, 1);
    156    CHECK(refIndex);
    157    CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
    158    CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex);
    159    CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex);
    160  }
    161 
    162  {
    163    char16_t yuml = 0x00FF;
    164    JS::Latin1Char latin1[] = {0xFF};
    165    const mozilla::Utf8Unit utf8[] = {
    166        mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)),
    167        mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))};
    168    char16_t char16[] = {0x00FF};
    169 
    170    auto refIndex =
    171        WellKnownParserAtoms::getSingleton().lookupTinyIndex(&yuml, 1);
    172    CHECK(refIndex);
    173    CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
    174    CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex);
    175    CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex);
    176  }
    177 
    178  return true;
    179 }
    180 END_TEST(testParserAtom_tiny1_nonASCII)
    181 
    182 // Test for tiny1 UTF-8 with valid/invalid code units.
    183 //
    184 // NOTE: Passing invalid UTF-8 to internUtf8 hits assertion failure, so
    185 //       test in the opposite way.
    186 //       lookupTinyIndexUTF8 is used inside internUtf8.
    187 BEGIN_TEST(testParserAtom_tiny1_invalidUTF8) {
    188  using js::frontend::ParserAtom;
    189  using js::frontend::ParserAtomsTable;
    190  using js::frontend::WellKnownParserAtoms;
    191 
    192  js::AutoReportFrontendContext fc(cx);
    193  js::LifoAlloc alloc(512, js::MallocArena);
    194  ParserAtomsTable atomTable(alloc);
    195 
    196  {
    197    const mozilla::Utf8Unit utf8[] = {
    198        mozilla::Utf8Unit(static_cast<unsigned char>(0xC1)),
    199        mozilla::Utf8Unit(static_cast<unsigned char>(0x80))};
    200 
    201    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2));
    202  }
    203 
    204  {
    205    const mozilla::Utf8Unit utf8[] = {
    206        mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)),
    207        mozilla::Utf8Unit(static_cast<unsigned char>(0x7F))};
    208 
    209    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2));
    210  }
    211 
    212  {
    213    JS::Latin1Char latin1[] = {0x80};
    214    const mozilla::Utf8Unit utf8[] = {
    215        mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)),
    216        mozilla::Utf8Unit(static_cast<unsigned char>(0x80))};
    217 
    218    auto refIndex =
    219        WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2);
    220    CHECK(refIndex);
    221    CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
    222  }
    223 
    224  {
    225    JS::Latin1Char latin1[] = {0xBF};
    226    const mozilla::Utf8Unit utf8[] = {
    227        mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)),
    228        mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))};
    229 
    230    auto refIndex =
    231        WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2);
    232    CHECK(refIndex);
    233    CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
    234  }
    235 
    236  {
    237    const mozilla::Utf8Unit utf8[] = {
    238        mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)),
    239        mozilla::Utf8Unit(static_cast<unsigned char>(0xC0))};
    240 
    241    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2));
    242  }
    243 
    244  {
    245    const mozilla::Utf8Unit utf8[] = {
    246        mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)),
    247        mozilla::Utf8Unit(static_cast<unsigned char>(0x7F))};
    248 
    249    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2));
    250  }
    251 
    252  {
    253    JS::Latin1Char latin1[] = {0xC0};
    254    const mozilla::Utf8Unit utf8[] = {
    255        mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)),
    256        mozilla::Utf8Unit(static_cast<unsigned char>(0x80))};
    257 
    258    auto refIndex =
    259        WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2);
    260    CHECK(refIndex);
    261    CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
    262  }
    263 
    264  {
    265    JS::Latin1Char latin1[] = {0xFF};
    266    const mozilla::Utf8Unit utf8[] = {
    267        mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)),
    268        mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))};
    269 
    270    auto refIndex =
    271        WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2);
    272    CHECK(refIndex);
    273    CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex);
    274  }
    275 
    276  {
    277    const mozilla::Utf8Unit utf8[] = {
    278        mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)),
    279        mozilla::Utf8Unit(static_cast<unsigned char>(0xC0))};
    280 
    281    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2));
    282  }
    283 
    284  {
    285    const mozilla::Utf8Unit utf8[] = {
    286        mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)),
    287        mozilla::Utf8Unit(static_cast<unsigned char>(0x7F))};
    288 
    289    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2));
    290  }
    291 
    292  {
    293    const mozilla::Utf8Unit utf8[] = {
    294        mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)),
    295        mozilla::Utf8Unit(static_cast<unsigned char>(0x80))};
    296 
    297    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2));
    298  }
    299 
    300  {
    301    const mozilla::Utf8Unit utf8[] = {
    302        mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)),
    303        mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))};
    304 
    305    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2));
    306  }
    307 
    308  {
    309    const mozilla::Utf8Unit utf8[] = {
    310        mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)),
    311        mozilla::Utf8Unit(static_cast<unsigned char>(0xC0))};
    312 
    313    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2));
    314  }
    315 
    316  return true;
    317 }
    318 END_TEST(testParserAtom_tiny1_invalidUTF8)
    319 
    320 // Test length-2 fast-path is consistent across entry points.
    321 BEGIN_TEST(testParserAtom_tiny2) {
    322  using js::frontend::ParserAtom;
    323  using js::frontend::ParserAtomsTable;
    324  using js::frontend::ParserAtomVector;
    325  using js::frontend::WellKnownParserAtoms;
    326 
    327  js::AutoReportFrontendContext fc(cx);
    328  js::LifoAlloc alloc(512, js::MallocArena);
    329  ParserAtomsTable atomTable(alloc);
    330 
    331  const char ascii[] = {'a', '0'};
    332  JS::Latin1Char latin1[] = {'a', '0'};
    333  const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a'),
    334                                    mozilla::Utf8Unit('0')};
    335  char16_t char16[] = {'a', '0'};
    336 
    337  auto refIndex =
    338      WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 2);
    339  CHECK(refIndex);
    340  CHECK(atomTable.internAscii(&fc, ascii, 2) == refIndex);
    341  CHECK(atomTable.internLatin1(&fc, latin1, 2) == refIndex);
    342  CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex);
    343  CHECK(atomTable.internChar16(&fc, char16, 2) == refIndex);
    344 
    345  // Note: If Latin1-Extended characters become supported, then UTF-8 behaviour
    346  // should be tested.
    347  char16_t ae0[] = {0x00E6, '0'};
    348  CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ae0, 2));
    349 
    350  return true;
    351 }
    352 END_TEST(testParserAtom_tiny2)
    353 
    354 // Test length-3 fast-path is consistent across entry points.
    355 BEGIN_TEST(testParserAtom_int) {
    356  using js::frontend::ParserAtom;
    357  using js::frontend::ParserAtomsTable;
    358  using js::frontend::ParserAtomVector;
    359  using js::frontend::WellKnownParserAtoms;
    360 
    361  js::AutoReportFrontendContext fc(cx);
    362  js::LifoAlloc alloc(512, js::MallocArena);
    363  ParserAtomsTable atomTable(alloc);
    364 
    365  {
    366    const char ascii[] = {'1', '0', '0'};
    367    JS::Latin1Char latin1[] = {'1', '0', '0'};
    368    const mozilla::Utf8Unit utf8[] = {
    369        mozilla::Utf8Unit('1'), mozilla::Utf8Unit('0'), mozilla::Utf8Unit('0')};
    370    char16_t char16[] = {'1', '0', '0'};
    371 
    372    auto refIndex =
    373        WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3);
    374    CHECK(refIndex);
    375    CHECK(atomTable.internAscii(&fc, ascii, 3) == refIndex);
    376    CHECK(atomTable.internLatin1(&fc, latin1, 3) == refIndex);
    377    CHECK(atomTable.internUtf8(&fc, utf8, 3) == refIndex);
    378    CHECK(atomTable.internChar16(&fc, char16, 3) == refIndex);
    379  }
    380 
    381  {
    382    const char ascii[] = {'2', '5', '5'};
    383    JS::Latin1Char latin1[] = {'2', '5', '5'};
    384    const mozilla::Utf8Unit utf8[] = {
    385        mozilla::Utf8Unit('2'), mozilla::Utf8Unit('5'), mozilla::Utf8Unit('5')};
    386    char16_t char16[] = {'2', '5', '5'};
    387 
    388    auto refIndex =
    389        WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3);
    390    CHECK(refIndex);
    391    CHECK(atomTable.internAscii(&fc, ascii, 3) == refIndex);
    392    CHECK(atomTable.internLatin1(&fc, latin1, 3) == refIndex);
    393    CHECK(atomTable.internUtf8(&fc, utf8, 3) == refIndex);
    394    CHECK(atomTable.internChar16(&fc, char16, 3) == refIndex);
    395  }
    396 
    397  {
    398    const char ascii[] = {'0', '9', '9'};
    399 
    400    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3));
    401  }
    402 
    403  {
    404    const char ascii[] = {'0', 'F', 'F'};
    405 
    406    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3));
    407  }
    408 
    409  {
    410    const char ascii[] = {'1', '0', 'A'};
    411 
    412    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3));
    413  }
    414 
    415  {
    416    const char ascii[] = {'1', '0', 'a'};
    417 
    418    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3));
    419  }
    420 
    421  {
    422    const char ascii[] = {'2', '5', '6'};
    423 
    424    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3));
    425  }
    426 
    427  {
    428    const char ascii[] = {'3', '0', '0'};
    429 
    430    CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3));
    431  }
    432 
    433  return true;
    434 }
    435 END_TEST(testParserAtom_int)
    436 
    437 // "€"    U+0080
    438 // "½"    U+00BD
    439 // "¿"    U+00BF
    440 // "À"    U+00C0
    441 // "æ"    U+00E6
    442 // "ÿ"    U+00FF
    443 // "π"    U+03C0
    444 // "🍕"   U+1F355