testParserAtom.cpp (14579B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "mozilla/Range.h" // mozilla::Range 6 #include "mozilla/Utf8.h" // mozilla::Utf8Unit 7 8 #include <string> // std::char_traits 9 #include <vector> // std::vector 10 11 #include "frontend/FrontendContext.h" // AutoReportFrontendContext 12 #include "frontend/ParserAtom.h" // js::frontend::ParserAtomsTable, js::frontend::WellKnownParserAtoms 13 #include "js/TypeDecls.h" // JS::Latin1Char 14 #include "jsapi-tests/tests.h" 15 16 // Test empty strings behave consistently. 17 BEGIN_TEST(testParserAtom_empty) { 18 using js::frontend::ParserAtom; 19 using js::frontend::ParserAtomsTable; 20 using js::frontend::ParserAtomVector; 21 using js::frontend::TaggedParserAtomIndex; 22 23 js::AutoReportFrontendContext fc(cx); 24 js::LifoAlloc alloc(512, js::MallocArena); 25 ParserAtomsTable atomTable(alloc); 26 27 const char ascii[] = {}; 28 const JS::Latin1Char latin1[] = {}; 29 const mozilla::Utf8Unit utf8[] = {}; 30 const char16_t char16[] = {}; 31 32 // Check that the well-known empty atom matches for different entry points. 33 auto refIndex = TaggedParserAtomIndex::WellKnown::empty(); 34 CHECK(atomTable.internAscii(&fc, ascii, 0) == refIndex); 35 CHECK(atomTable.internLatin1(&fc, latin1, 0) == refIndex); 36 CHECK(atomTable.internUtf8(&fc, utf8, 0) == refIndex); 37 CHECK(atomTable.internChar16(&fc, char16, 0) == refIndex); 38 39 return true; 40 } 41 END_TEST(testParserAtom_empty) 42 43 // Test length-1 fast-path is consistent across entry points for ASCII. 44 BEGIN_TEST(testParserAtom_tiny1_ASCII) { 45 using js::frontend::ParserAtom; 46 using js::frontend::ParserAtomsTable; 47 using js::frontend::ParserAtomVector; 48 using js::frontend::WellKnownParserAtoms; 49 50 js::AutoReportFrontendContext fc(cx); 51 js::LifoAlloc alloc(512, js::MallocArena); 52 ParserAtomsTable atomTable(alloc); 53 54 char16_t a = 'a'; 55 const char ascii[] = {'a'}; 56 JS::Latin1Char latin1[] = {'a'}; 57 const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a')}; 58 char16_t char16[] = {'a'}; 59 60 auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(&a, 1); 61 CHECK(refIndex); 62 CHECK(atomTable.internAscii(&fc, ascii, 1) == refIndex); 63 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 64 CHECK(atomTable.internUtf8(&fc, utf8, 1) == refIndex); 65 CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); 66 67 return true; 68 } 69 END_TEST(testParserAtom_tiny1_ASCII) 70 71 // Test length-1 fast-path is consistent across entry points for non-ASCII. 72 BEGIN_TEST(testParserAtom_tiny1_nonASCII) { 73 using js::frontend::ParserAtom; 74 using js::frontend::ParserAtomsTable; 75 using js::frontend::ParserAtomVector; 76 using js::frontend::WellKnownParserAtoms; 77 78 js::AutoReportFrontendContext fc(cx); 79 js::LifoAlloc alloc(512, js::MallocArena); 80 ParserAtomsTable atomTable(alloc); 81 82 { 83 char16_t euro = 0x0080; 84 JS::Latin1Char latin1[] = {0x80}; 85 const mozilla::Utf8Unit utf8[] = { 86 mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), 87 mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; 88 char16_t char16[] = {0x0080}; 89 90 auto refIndex = 91 WellKnownParserAtoms::getSingleton().lookupTinyIndex(&euro, 1); 92 CHECK(refIndex); 93 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 94 CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); 95 CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); 96 } 97 98 { 99 char16_t frac12 = 0x00BD; 100 JS::Latin1Char latin1[] = {0xBD}; 101 const mozilla::Utf8Unit utf8[] = { 102 mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), 103 mozilla::Utf8Unit(static_cast<unsigned char>(0xBD))}; 104 char16_t char16[] = {0x00BD}; 105 106 auto refIndex = 107 WellKnownParserAtoms::getSingleton().lookupTinyIndex(½, 1); 108 CHECK(refIndex); 109 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 110 CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); 111 CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); 112 } 113 114 { 115 char16_t iquest = 0x00BF; 116 JS::Latin1Char latin1[] = {0xBF}; 117 const mozilla::Utf8Unit utf8[] = { 118 mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), 119 mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))}; 120 char16_t char16[] = {0x00BF}; 121 122 auto refIndex = 123 WellKnownParserAtoms::getSingleton().lookupTinyIndex(¿, 1); 124 CHECK(refIndex); 125 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 126 CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); 127 CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); 128 } 129 130 { 131 char16_t agrave = 0x00C0; 132 JS::Latin1Char latin1[] = {0xC0}; 133 const mozilla::Utf8Unit utf8[] = { 134 mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), 135 mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; 136 char16_t char16[] = {0x00C0}; 137 138 auto refIndex = 139 WellKnownParserAtoms::getSingleton().lookupTinyIndex(à, 1); 140 CHECK(refIndex); 141 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 142 CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); 143 CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); 144 } 145 146 { 147 char16_t ae = 0x00E6; 148 JS::Latin1Char latin1[] = {0xE6}; 149 const mozilla::Utf8Unit utf8[] = { 150 mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), 151 mozilla::Utf8Unit(static_cast<unsigned char>(0xA6))}; 152 char16_t char16[] = {0x00E6}; 153 154 auto refIndex = 155 WellKnownParserAtoms::getSingleton().lookupTinyIndex(&ae, 1); 156 CHECK(refIndex); 157 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 158 CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); 159 CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); 160 } 161 162 { 163 char16_t yuml = 0x00FF; 164 JS::Latin1Char latin1[] = {0xFF}; 165 const mozilla::Utf8Unit utf8[] = { 166 mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), 167 mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))}; 168 char16_t char16[] = {0x00FF}; 169 170 auto refIndex = 171 WellKnownParserAtoms::getSingleton().lookupTinyIndex(ÿ, 1); 172 CHECK(refIndex); 173 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 174 CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); 175 CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); 176 } 177 178 return true; 179 } 180 END_TEST(testParserAtom_tiny1_nonASCII) 181 182 // Test for tiny1 UTF-8 with valid/invalid code units. 183 // 184 // NOTE: Passing invalid UTF-8 to internUtf8 hits assertion failure, so 185 // test in the opposite way. 186 // lookupTinyIndexUTF8 is used inside internUtf8. 187 BEGIN_TEST(testParserAtom_tiny1_invalidUTF8) { 188 using js::frontend::ParserAtom; 189 using js::frontend::ParserAtomsTable; 190 using js::frontend::WellKnownParserAtoms; 191 192 js::AutoReportFrontendContext fc(cx); 193 js::LifoAlloc alloc(512, js::MallocArena); 194 ParserAtomsTable atomTable(alloc); 195 196 { 197 const mozilla::Utf8Unit utf8[] = { 198 mozilla::Utf8Unit(static_cast<unsigned char>(0xC1)), 199 mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; 200 201 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); 202 } 203 204 { 205 const mozilla::Utf8Unit utf8[] = { 206 mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), 207 mozilla::Utf8Unit(static_cast<unsigned char>(0x7F))}; 208 209 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); 210 } 211 212 { 213 JS::Latin1Char latin1[] = {0x80}; 214 const mozilla::Utf8Unit utf8[] = { 215 mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), 216 mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; 217 218 auto refIndex = 219 WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); 220 CHECK(refIndex); 221 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 222 } 223 224 { 225 JS::Latin1Char latin1[] = {0xBF}; 226 const mozilla::Utf8Unit utf8[] = { 227 mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), 228 mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))}; 229 230 auto refIndex = 231 WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); 232 CHECK(refIndex); 233 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 234 } 235 236 { 237 const mozilla::Utf8Unit utf8[] = { 238 mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), 239 mozilla::Utf8Unit(static_cast<unsigned char>(0xC0))}; 240 241 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); 242 } 243 244 { 245 const mozilla::Utf8Unit utf8[] = { 246 mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), 247 mozilla::Utf8Unit(static_cast<unsigned char>(0x7F))}; 248 249 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); 250 } 251 252 { 253 JS::Latin1Char latin1[] = {0xC0}; 254 const mozilla::Utf8Unit utf8[] = { 255 mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), 256 mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; 257 258 auto refIndex = 259 WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); 260 CHECK(refIndex); 261 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 262 } 263 264 { 265 JS::Latin1Char latin1[] = {0xFF}; 266 const mozilla::Utf8Unit utf8[] = { 267 mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), 268 mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))}; 269 270 auto refIndex = 271 WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); 272 CHECK(refIndex); 273 CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); 274 } 275 276 { 277 const mozilla::Utf8Unit utf8[] = { 278 mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), 279 mozilla::Utf8Unit(static_cast<unsigned char>(0xC0))}; 280 281 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); 282 } 283 284 { 285 const mozilla::Utf8Unit utf8[] = { 286 mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)), 287 mozilla::Utf8Unit(static_cast<unsigned char>(0x7F))}; 288 289 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); 290 } 291 292 { 293 const mozilla::Utf8Unit utf8[] = { 294 mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)), 295 mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; 296 297 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); 298 } 299 300 { 301 const mozilla::Utf8Unit utf8[] = { 302 mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)), 303 mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))}; 304 305 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); 306 } 307 308 { 309 const mozilla::Utf8Unit utf8[] = { 310 mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)), 311 mozilla::Utf8Unit(static_cast<unsigned char>(0xC0))}; 312 313 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); 314 } 315 316 return true; 317 } 318 END_TEST(testParserAtom_tiny1_invalidUTF8) 319 320 // Test length-2 fast-path is consistent across entry points. 321 BEGIN_TEST(testParserAtom_tiny2) { 322 using js::frontend::ParserAtom; 323 using js::frontend::ParserAtomsTable; 324 using js::frontend::ParserAtomVector; 325 using js::frontend::WellKnownParserAtoms; 326 327 js::AutoReportFrontendContext fc(cx); 328 js::LifoAlloc alloc(512, js::MallocArena); 329 ParserAtomsTable atomTable(alloc); 330 331 const char ascii[] = {'a', '0'}; 332 JS::Latin1Char latin1[] = {'a', '0'}; 333 const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a'), 334 mozilla::Utf8Unit('0')}; 335 char16_t char16[] = {'a', '0'}; 336 337 auto refIndex = 338 WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 2); 339 CHECK(refIndex); 340 CHECK(atomTable.internAscii(&fc, ascii, 2) == refIndex); 341 CHECK(atomTable.internLatin1(&fc, latin1, 2) == refIndex); 342 CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); 343 CHECK(atomTable.internChar16(&fc, char16, 2) == refIndex); 344 345 // Note: If Latin1-Extended characters become supported, then UTF-8 behaviour 346 // should be tested. 347 char16_t ae0[] = {0x00E6, '0'}; 348 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ae0, 2)); 349 350 return true; 351 } 352 END_TEST(testParserAtom_tiny2) 353 354 // Test length-3 fast-path is consistent across entry points. 355 BEGIN_TEST(testParserAtom_int) { 356 using js::frontend::ParserAtom; 357 using js::frontend::ParserAtomsTable; 358 using js::frontend::ParserAtomVector; 359 using js::frontend::WellKnownParserAtoms; 360 361 js::AutoReportFrontendContext fc(cx); 362 js::LifoAlloc alloc(512, js::MallocArena); 363 ParserAtomsTable atomTable(alloc); 364 365 { 366 const char ascii[] = {'1', '0', '0'}; 367 JS::Latin1Char latin1[] = {'1', '0', '0'}; 368 const mozilla::Utf8Unit utf8[] = { 369 mozilla::Utf8Unit('1'), mozilla::Utf8Unit('0'), mozilla::Utf8Unit('0')}; 370 char16_t char16[] = {'1', '0', '0'}; 371 372 auto refIndex = 373 WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3); 374 CHECK(refIndex); 375 CHECK(atomTable.internAscii(&fc, ascii, 3) == refIndex); 376 CHECK(atomTable.internLatin1(&fc, latin1, 3) == refIndex); 377 CHECK(atomTable.internUtf8(&fc, utf8, 3) == refIndex); 378 CHECK(atomTable.internChar16(&fc, char16, 3) == refIndex); 379 } 380 381 { 382 const char ascii[] = {'2', '5', '5'}; 383 JS::Latin1Char latin1[] = {'2', '5', '5'}; 384 const mozilla::Utf8Unit utf8[] = { 385 mozilla::Utf8Unit('2'), mozilla::Utf8Unit('5'), mozilla::Utf8Unit('5')}; 386 char16_t char16[] = {'2', '5', '5'}; 387 388 auto refIndex = 389 WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3); 390 CHECK(refIndex); 391 CHECK(atomTable.internAscii(&fc, ascii, 3) == refIndex); 392 CHECK(atomTable.internLatin1(&fc, latin1, 3) == refIndex); 393 CHECK(atomTable.internUtf8(&fc, utf8, 3) == refIndex); 394 CHECK(atomTable.internChar16(&fc, char16, 3) == refIndex); 395 } 396 397 { 398 const char ascii[] = {'0', '9', '9'}; 399 400 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); 401 } 402 403 { 404 const char ascii[] = {'0', 'F', 'F'}; 405 406 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); 407 } 408 409 { 410 const char ascii[] = {'1', '0', 'A'}; 411 412 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); 413 } 414 415 { 416 const char ascii[] = {'1', '0', 'a'}; 417 418 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); 419 } 420 421 { 422 const char ascii[] = {'2', '5', '6'}; 423 424 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); 425 } 426 427 { 428 const char ascii[] = {'3', '0', '0'}; 429 430 CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); 431 } 432 433 return true; 434 } 435 END_TEST(testParserAtom_int) 436 437 // "€" U+0080 438 // "½" U+00BD 439 // "¿" U+00BF 440 // "À" U+00C0 441 // "æ" U+00E6 442 // "ÿ" U+00FF 443 // "π" U+03C0 444 // "🍕" U+1F355