RegExp.cpp (88572B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "builtin/RegExp.h" 8 9 #include "mozilla/Casting.h" 10 #include "mozilla/CheckedInt.h" 11 #include "mozilla/TextUtils.h" 12 13 #include "jsapi.h" 14 15 #include "frontend/FrontendContext.h" // AutoReportFrontendContext 16 #include "frontend/TokenStream.h" 17 #include "irregexp/RegExpAPI.h" 18 #include "jit/InlinableNatives.h" 19 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_NEWREGEXP_FLAGGED 20 #include "js/PropertySpec.h" 21 #include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags 22 #include "util/StringBuilder.h" 23 #include "vm/EqualityOperations.h" 24 #include "vm/Interpreter.h" 25 #include "vm/JSContext.h" 26 #include "vm/RegExpObject.h" 27 #include "vm/RegExpStatics.h" 28 #include "vm/SelfHosting.h" 29 30 #include "vm/EnvironmentObject-inl.h" 31 #include "vm/GeckoProfiler-inl.h" 32 #include "vm/JSObject-inl.h" 33 #include "vm/ObjectOperations-inl.h" 34 #include "vm/PlainObject-inl.h" 35 36 using namespace js; 37 38 using mozilla::AssertedCast; 39 using mozilla::CheckedInt; 40 using mozilla::IsAsciiDigit; 41 42 using JS::CompileOptions; 43 using JS::RegExpFlag; 44 using JS::RegExpFlags; 45 46 // Allocate an object for the |.groups| or |.indices.groups| property 47 // of a regexp match result. 48 static PlainObject* CreateGroupsObject(JSContext* cx, 49 Handle<PlainObject*> groupsTemplate) { 50 if (groupsTemplate->inDictionaryMode()) { 51 return NewPlainObjectWithProto(cx, nullptr); 52 } 53 54 // The groups template object is stored in RegExpShared, which is shared 55 // across compartments and realms. So watch out for the case when the template 56 // object's realm is different from the current realm. 57 if (cx->realm() != groupsTemplate->realm()) { 58 return PlainObject::createWithTemplateFromDifferentRealm(cx, 59 groupsTemplate); 60 } 61 62 return PlainObject::createWithTemplate(cx, groupsTemplate); 63 } 64 65 static inline void getValueAndIndex(HandleRegExpShared re, uint32_t i, 66 Handle<ArrayObject*> arr, 67 MutableHandleValue val, 68 uint32_t& valueIndex) { 69 if (re->numNamedCaptures() == re->numDistinctNamedCaptures()) { 70 valueIndex = re->getNamedCaptureIndex(i); 71 val.set(arr->getDenseElement(valueIndex)); 72 } else { 73 mozilla::Span<uint32_t> indicesSlice = re->getNamedCaptureIndices(i); 74 MOZ_ASSERT(!indicesSlice.IsEmpty()); 75 valueIndex = indicesSlice[0]; 76 for (uint32_t index : indicesSlice) { 77 val.set(arr->getDenseElement(index)); 78 if (!val.isUndefined()) { 79 valueIndex = index; 80 break; 81 } 82 } 83 } 84 } 85 86 /* 87 * Implements RegExpBuiltinExec: Steps 18-35 88 * https://tc39.es/ecma262/#sec-regexpbuiltinexec 89 */ 90 bool js::CreateRegExpMatchResult(JSContext* cx, HandleRegExpShared re, 91 HandleString input, const MatchPairs& matches, 92 MutableHandleValue rval) { 93 MOZ_ASSERT(re); 94 MOZ_ASSERT(input); 95 96 /* 97 * Create the (slow) result array for a match. 98 * 99 * Array contents: 100 * 0: matched string 101 * 1..pairCount-1: paren matches 102 * input: input string 103 * index: start index for the match 104 * groups: named capture groups for the match 105 * indices: capture indices for the match, if required 106 */ 107 108 bool hasIndices = re->hasIndices(); 109 110 // Get the shape for the output object. 111 RegExpRealm::ResultShapeKind kind = 112 hasIndices ? RegExpRealm::ResultShapeKind::WithIndices 113 : RegExpRealm::ResultShapeKind::Normal; 114 Rooted<SharedShape*> shape( 115 cx, cx->global()->regExpRealm().getOrCreateMatchResultShape(cx, kind)); 116 if (!shape) { 117 return false; 118 } 119 120 // Steps 18-19 121 size_t numPairs = matches.length(); 122 MOZ_ASSERT(numPairs > 0); 123 124 // Steps 20-21: Allocate the match result object. 125 Rooted<ArrayObject*> arr( 126 cx, NewDenseFullyAllocatedArrayWithShape(cx, numPairs, shape)); 127 if (!arr) { 128 return false; 129 } 130 131 // Steps 28-29 and 33 a-d: Initialize the elements of the match result. 132 // Store a Value for each match pair. 133 for (size_t i = 0; i < numPairs; i++) { 134 const MatchPair& pair = matches[i]; 135 136 if (pair.isUndefined()) { 137 MOZ_ASSERT(i != 0); // Since we had a match, first pair must be present. 138 arr->setDenseInitializedLength(i + 1); 139 arr->initDenseElement(i, UndefinedValue()); 140 } else { 141 JSLinearString* str = 142 NewDependentString(cx, input, pair.start, pair.length()); 143 if (!str) { 144 return false; 145 } 146 arr->setDenseInitializedLength(i + 1); 147 arr->initDenseElement(i, StringValue(str)); 148 } 149 } 150 151 // Step 34a (reordered): Allocate and initialize the indices object if needed. 152 // This is an inlined implementation of MakeIndicesArray: 153 // https://tc39.es/ecma262/#sec-makeindicesarray 154 Rooted<ArrayObject*> indices(cx); 155 Rooted<PlainObject*> indicesGroups(cx); 156 if (hasIndices) { 157 // MakeIndicesArray: step 8 158 Rooted<SharedShape*> indicesShape( 159 cx, cx->global()->regExpRealm().getOrCreateMatchResultShape( 160 cx, RegExpRealm::ResultShapeKind::Indices)); 161 if (!indicesShape) { 162 return false; 163 } 164 indices = NewDenseFullyAllocatedArrayWithShape(cx, numPairs, indicesShape); 165 if (!indices) { 166 return false; 167 } 168 169 // MakeIndicesArray: steps 10-12 170 if (re->numNamedCaptures() > 0) { 171 Rooted<PlainObject*> groupsTemplate(cx, re->getGroupsTemplate()); 172 indicesGroups = CreateGroupsObject(cx, groupsTemplate); 173 if (!indicesGroups) { 174 return false; 175 } 176 indices->initSlot(RegExpRealm::IndicesGroupsSlot, 177 ObjectValue(*indicesGroups)); 178 } 179 180 // MakeIndicesArray: step 13 a-d. (Step 13.e is implemented below.) 181 for (size_t i = 0; i < numPairs; i++) { 182 const MatchPair& pair = matches[i]; 183 184 if (pair.isUndefined()) { 185 // Since we had a match, first pair must be present. 186 MOZ_ASSERT(i != 0); 187 indices->setDenseInitializedLength(i + 1); 188 indices->initDenseElement(i, UndefinedValue()); 189 } else { 190 ArrayObject* indexPair = NewDenseFullyAllocatedArray(cx, 2); 191 if (!indexPair) { 192 return false; 193 } 194 indexPair->setDenseInitializedLength(2); 195 indexPair->initDenseElement(0, Int32Value(pair.start)); 196 indexPair->initDenseElement(1, Int32Value(pair.limit)); 197 198 indices->setDenseInitializedLength(i + 1); 199 indices->initDenseElement(i, ObjectValue(*indexPair)); 200 } 201 } 202 } 203 204 // Steps 30-31 (reordered): Allocate the groups object (if needed). 205 Rooted<PlainObject*> groups(cx); 206 bool groupsInDictionaryMode = false; 207 if (re->numNamedCaptures() > 0) { 208 Rooted<PlainObject*> groupsTemplate(cx, re->getGroupsTemplate()); 209 groupsInDictionaryMode = groupsTemplate->inDictionaryMode(); 210 groups = CreateGroupsObject(cx, groupsTemplate); 211 if (!groups) { 212 return false; 213 } 214 } 215 216 // Step 33 e-f: Initialize the properties of |groups| and |indices.groups|. 217 // The groups template object stores the names of the named captures 218 // in the the order in which they are defined. The named capture 219 // indices vector stores the corresponding capture indices. In 220 // dictionary mode, we have to define the properties explicitly. If 221 // we are not in dictionary mode, we simply fill in the slots with 222 // the correct values. 223 if (groupsInDictionaryMode) { 224 RootedIdVector keys(cx); 225 Rooted<PlainObject*> groupsTemplate(cx, re->getGroupsTemplate()); 226 if (!GetPropertyKeys(cx, groupsTemplate, 0, &keys)) { 227 return false; 228 } 229 MOZ_ASSERT(keys.length() == re->numDistinctNamedCaptures()); 230 RootedId key(cx); 231 RootedValue val(cx); 232 uint32_t valueIndex; 233 for (uint32_t i = 0; i < keys.length(); i++) { 234 key = keys[i]; 235 getValueAndIndex(re, i, arr, &val, valueIndex); 236 if (!NativeDefineDataProperty(cx, groups, key, val, JSPROP_ENUMERATE)) { 237 return false; 238 } 239 240 // MakeIndicesArray: Step 13.e (reordered) 241 if (hasIndices) { 242 val = indices->getDenseElement(valueIndex); 243 if (!NativeDefineDataProperty(cx, indicesGroups, key, val, 244 JSPROP_ENUMERATE)) { 245 return false; 246 } 247 } 248 } 249 } else { 250 RootedValue val(cx); 251 uint32_t valueIndex; 252 253 for (uint32_t i = 0; i < re->numDistinctNamedCaptures(); i++) { 254 getValueAndIndex(re, i, arr, &val, valueIndex); 255 groups->initSlot(i, val); 256 257 // MakeIndicesArray: Step 13.e (reordered) 258 if (hasIndices) { 259 indicesGroups->initSlot(i, indices->getDenseElement(valueIndex)); 260 } 261 } 262 } 263 264 // Step 22 (reordered). 265 // Set the |index| property. 266 arr->initSlot(RegExpRealm::MatchResultObjectIndexSlot, 267 Int32Value(matches[0].start)); 268 269 // Step 23 (reordered). 270 // Set the |input| property. 271 arr->initSlot(RegExpRealm::MatchResultObjectInputSlot, StringValue(input)); 272 273 // Step 32 (reordered) 274 // Set the |groups| property. 275 if (groups) { 276 arr->initSlot(RegExpRealm::MatchResultObjectGroupsSlot, 277 ObjectValue(*groups)); 278 } 279 280 // Step 34b 281 // Set the |indices| property. 282 if (re->hasIndices()) { 283 arr->initSlot(RegExpRealm::MatchResultObjectIndicesSlot, 284 ObjectValue(*indices)); 285 } 286 287 #ifdef DEBUG 288 RootedValue test(cx); 289 RootedId id(cx, NameToId(cx->names().index)); 290 if (!NativeGetProperty(cx, arr, id, &test)) { 291 return false; 292 } 293 MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectIndexSlot)); 294 id = NameToId(cx->names().input); 295 if (!NativeGetProperty(cx, arr, id, &test)) { 296 return false; 297 } 298 MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectInputSlot)); 299 #endif 300 301 // Step 35. 302 rval.setObject(*arr); 303 return true; 304 } 305 306 static int32_t CreateRegExpSearchResult(JSContext* cx, 307 const MatchPairs& matches) { 308 MOZ_ASSERT(matches[0].start >= 0); 309 MOZ_ASSERT(matches[0].limit >= 0); 310 311 MOZ_ASSERT(cx->regExpSearcherLastLimit == RegExpSearcherLastLimitSentinel); 312 313 #ifdef DEBUG 314 static_assert(JSString::MAX_LENGTH < RegExpSearcherLastLimitSentinel); 315 MOZ_ASSERT(uint32_t(matches[0].limit) < RegExpSearcherLastLimitSentinel); 316 #endif 317 318 cx->regExpSearcherLastLimit = matches[0].limit; 319 return matches[0].start; 320 } 321 /* 322 * https://github.com/tc39/proposal-regexp-legacy-features/blob/master/README.md#regexpbuiltinexec--r-s- 323 * 324 */ 325 326 static bool ShouldUpdateRegExpStatics(JSContext* cx, 327 Handle<RegExpObject*> regexp) { 328 if (!JS::Prefs::experimental_legacy_regexp()) { 329 return true; 330 } 331 // Step 5. Let thisRealm be the current Realm Record. 332 JS::Realm* thisRealm = cx->realm(); 333 // Step 6. Let rRealm be the value of R's [[Realm]] internal slot. 334 JS::Realm* rRealm = regexp->realm(); 335 336 // Step 7. If SameValue(thisRealm, rRealm) is true, then 337 if (thisRealm == rRealm) { 338 return regexp->legacyFeaturesEnabled(); 339 } 340 return false; 341 } 342 343 /* 344 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 345 * steps 3, 9-14, except 12.a.i, 12.c.i.1. 346 */ 347 static RegExpRunStatus ExecuteRegExpImpl(JSContext* cx, RegExpStatics* res, 348 MutableHandleRegExpShared re, 349 Handle<JSLinearString*> input, 350 size_t searchIndex, 351 VectorMatchPairs* matches, 352 Handle<RegExpObject*> regexp) { 353 RegExpRunStatus status = 354 RegExpShared::execute(cx, re, input, searchIndex, matches); 355 356 /* Out of spec: Update RegExpStatics. */ 357 if (status == RegExpRunStatus::Success && res) { 358 if (ShouldUpdateRegExpStatics(cx, regexp)) { 359 if (!res->updateFromMatchPairs(cx, input, *matches)) { 360 return RegExpRunStatus::Error; 361 } 362 } else { 363 res->invalidate(); 364 } 365 } 366 return status; 367 } 368 369 /* Legacy ExecuteRegExp behavior is baked into the JSAPI. */ 370 bool js::ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res, 371 Handle<RegExpObject*> reobj, 372 Handle<JSLinearString*> input, size_t* lastIndex, 373 bool test, MutableHandleValue rval) { 374 cx->check(reobj, input); 375 376 RootedRegExpShared shared(cx, RegExpObject::getShared(cx, reobj)); 377 if (!shared) { 378 return false; 379 } 380 381 VectorMatchPairs matches; 382 383 RegExpRunStatus status = 384 ExecuteRegExpImpl(cx, res, &shared, input, *lastIndex, &matches, reobj); 385 if (status == RegExpRunStatus::Error) { 386 return false; 387 } 388 389 if (status == RegExpRunStatus::Success_NotFound) { 390 /* ExecuteRegExp() previously returned an array or null. */ 391 rval.setNull(); 392 return true; 393 } 394 395 *lastIndex = matches[0].limit; 396 397 if (test) { 398 /* Forbid an array, as an optimization. */ 399 rval.setBoolean(true); 400 return true; 401 } 402 403 return CreateRegExpMatchResult(cx, shared, input, matches, rval); 404 } 405 406 static bool CheckPatternSyntaxSlow(JSContext* cx, Handle<JSAtom*> pattern, 407 RegExpFlags flags) { 408 LifoAllocScope allocScope(&cx->tempLifoAlloc()); 409 AutoReportFrontendContext fc(cx); 410 CompileOptions options(cx); 411 frontend::DummyTokenStream dummyTokenStream(&fc, options); 412 return irregexp::CheckPatternSyntax(cx, cx->stackLimitForCurrentPrincipal(), 413 dummyTokenStream, pattern, flags); 414 } 415 416 static RegExpShared* CheckPatternSyntax(JSContext* cx, Handle<JSAtom*> pattern, 417 RegExpFlags flags) { 418 // If we already have a RegExpShared for this pattern/flags, we can 419 // avoid the much slower CheckPatternSyntaxSlow call. 420 421 RootedRegExpShared shared(cx, cx->zone()->regExps().maybeGet(pattern, flags)); 422 if (shared) { 423 #ifdef DEBUG 424 // Assert the pattern is valid. 425 if (!CheckPatternSyntaxSlow(cx, pattern, flags)) { 426 MOZ_ASSERT(cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed()); 427 return nullptr; 428 } 429 #endif 430 return shared; 431 } 432 433 if (!CheckPatternSyntaxSlow(cx, pattern, flags)) { 434 return nullptr; 435 } 436 437 // Allocate and return a new RegExpShared so we will hit the fast path 438 // next time. 439 return cx->zone()->regExps().get(cx, pattern, flags); 440 } 441 442 /* 443 * ES 2016 draft Mar 25, 2016 21.2.3.2.2. 444 * 445 * Steps 14-15 set |obj|'s "lastIndex" property to zero. Some of 446 * RegExpInitialize's callers have a fresh RegExp not yet exposed to script: 447 * in these cases zeroing "lastIndex" is infallible. But others have a RegExp 448 * whose "lastIndex" property might have been made non-writable: here, zeroing 449 * "lastIndex" can fail. We efficiently solve this problem by completely 450 * removing "lastIndex" zeroing from the provided function. 451 * 452 * CALLERS MUST HANDLE "lastIndex" ZEROING THEMSELVES! 453 * 454 * Because this function only ever returns a user-provided |obj| in the spec, 455 * we omit it and just return the usual success/failure. 456 */ 457 static bool RegExpInitializeIgnoringLastIndex(JSContext* cx, 458 Handle<RegExpObject*> obj, 459 HandleValue patternValue, 460 HandleValue flagsValue) { 461 Rooted<JSAtom*> pattern(cx); 462 if (patternValue.isUndefined()) { 463 /* Step 1. */ 464 pattern = cx->names().empty_; 465 } else { 466 /* Step 2. */ 467 pattern = ToAtom<CanGC>(cx, patternValue); 468 if (!pattern) { 469 return false; 470 } 471 } 472 473 /* Step 3. */ 474 RegExpFlags flags = RegExpFlag::NoFlags; 475 if (!flagsValue.isUndefined()) { 476 /* Step 4. */ 477 RootedString flagStr(cx, ToString<CanGC>(cx, flagsValue)); 478 if (!flagStr) { 479 return false; 480 } 481 482 /* Step 5. */ 483 if (!ParseRegExpFlags(cx, flagStr, &flags)) { 484 return false; 485 } 486 } 487 488 /* Steps 7-8. */ 489 RegExpShared* shared = CheckPatternSyntax(cx, pattern, flags); 490 if (!shared) { 491 return false; 492 } 493 494 /* Steps 9-12. */ 495 obj->initIgnoringLastIndex(pattern, flags); 496 497 obj->setShared(shared); 498 499 return true; 500 } 501 502 /* ES 2016 draft Mar 25, 2016 21.2.3.2.3. */ 503 bool js::RegExpCreate(JSContext* cx, HandleValue patternValue, 504 HandleValue flagsValue, MutableHandleValue rval, 505 HandleObject newTarget) { 506 /* Step 1. */ 507 Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, newTarget)); 508 if (!regexp) { 509 return false; 510 } 511 512 /* Step 2. */ 513 if (!RegExpInitializeIgnoringLastIndex(cx, regexp, patternValue, 514 flagsValue)) { 515 return false; 516 } 517 regexp->zeroLastIndex(cx); 518 519 rval.setObject(*regexp); 520 return true; 521 } 522 523 MOZ_ALWAYS_INLINE bool IsRegExpObject(HandleValue v) { 524 return v.isObject() && v.toObject().is<RegExpObject>(); 525 } 526 527 /* ES6 draft rc3 7.2.8. */ 528 bool js::IsRegExp(JSContext* cx, HandleValue value, bool* result) { 529 /* Step 1. */ 530 if (!value.isObject()) { 531 *result = false; 532 return true; 533 } 534 RootedObject obj(cx, &value.toObject()); 535 536 /* Steps 2-3. */ 537 RootedValue isRegExp(cx); 538 RootedId matchId(cx, PropertyKey::Symbol(cx->wellKnownSymbols().match)); 539 if (!GetProperty(cx, obj, obj, matchId, &isRegExp)) { 540 return false; 541 } 542 543 /* Step 4. */ 544 if (!isRegExp.isUndefined()) { 545 *result = ToBoolean(isRegExp); 546 return true; 547 } 548 549 /* Steps 5-6. */ 550 ESClass cls; 551 if (!GetClassOfValue(cx, value, &cls)) { 552 return false; 553 } 554 555 *result = cls == ESClass::RegExp; 556 return true; 557 } 558 559 // The "lastIndex" property is non-configurable, but it can be made 560 // non-writable. If CalledFromJit is true, we have emitted guards to ensure it's 561 // writable. 562 template <bool CalledFromJit = false> 563 static bool SetLastIndex(JSContext* cx, Handle<RegExpObject*> regexp, 564 int32_t lastIndex) { 565 MOZ_ASSERT(lastIndex >= 0); 566 567 if (CalledFromJit || MOZ_LIKELY(RegExpObject::isInitialShape(regexp)) || 568 regexp->lookupPure(cx->names().lastIndex)->writable()) { 569 regexp->setLastIndex(cx, lastIndex); 570 return true; 571 } 572 573 Rooted<Value> val(cx, Int32Value(lastIndex)); 574 return SetProperty(cx, regexp, cx->names().lastIndex, val); 575 } 576 577 /* 578 * RegExp.prototype.compile ( pattern, flags ) 579 * https://github.com/tc39/proposal-regexp-legacy-features?tab=readme-ov-file#regexpprototypecompile--pattern-flags- 580 * ES6 B.2.5.1. 581 */ 582 MOZ_ALWAYS_INLINE bool regexp_compile_impl(JSContext* cx, 583 const CallArgs& args) { 584 MOZ_ASSERT(IsRegExpObject(args.thisv())); 585 586 Rooted<RegExpObject*> regexp(cx, &args.thisv().toObject().as<RegExpObject>()); 587 588 // Step 7. If Type(pattern) is Object and pattern has a [[RegExpMatcher]] 589 // internal slot, then 590 RootedValue patternValue(cx, args.get(0)); 591 ESClass cls; 592 if (!GetClassOfValue(cx, patternValue, &cls)) { 593 return false; 594 } 595 if (cls == ESClass::RegExp) { 596 // Step 7.i. If flags is not undefined, throw a TypeError exception. 597 if (args.hasDefined(1)) { 598 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 599 JSMSG_NEWREGEXP_FLAGGED); 600 return false; 601 } 602 603 // Beware! |patternObj| might be a proxy into another compartment, so 604 // don't assume |patternObj.is<RegExpObject>()|. For the same reason, 605 // don't reuse the RegExpShared below. 606 RootedObject patternObj(cx, &patternValue.toObject()); 607 608 Rooted<JSAtom*> sourceAtom(cx); 609 RegExpFlags flags = RegExpFlag::NoFlags; 610 { 611 // Step 7.ii. Let P be the value of pattern’s [[OriginalSource]] internal 612 // slot. 613 RegExpShared* shared = RegExpToShared(cx, patternObj); 614 if (!shared) { 615 return false; 616 } 617 618 sourceAtom = shared->getSource(); 619 flags = shared->getFlags(); 620 } 621 622 // Step 9, minus lastIndex zeroing. 623 regexp->initIgnoringLastIndex(sourceAtom, flags); 624 } else { 625 // Step 8. 626 RootedValue P(cx, patternValue); 627 RootedValue F(cx, args.get(1)); 628 629 // Step 9, minus lastIndex zeroing. 630 if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) { 631 return false; 632 } 633 } 634 635 // The final niggling bit of step 8. 636 // 637 // |regexp| is user-exposed, so its "lastIndex" property might be 638 // non-writable. 639 if (!SetLastIndex(cx, regexp, 0)) { 640 return false; 641 } 642 643 args.rval().setObject(*regexp); 644 return true; 645 } 646 647 static bool regexp_compile(JSContext* cx, unsigned argc, Value* vp) { 648 CallArgs args = CallArgsFromVp(argc, vp); 649 650 if (JS::Prefs::experimental_legacy_regexp() && args.thisv().isObject()) { 651 RootedObject thisObj(cx, &args.thisv().toObject()); 652 653 JSObject* unwrapped = js::CheckedUnwrapStatic(thisObj); 654 655 if (unwrapped && unwrapped->is<RegExpObject>()) { 656 // Step 3. Let thisRealm be the current Realm Record. 657 JS::Realm* thisRealm = cx->realm(); 658 659 // Step 4. Let oRealm be the value of O’s [[Realm]] internal slot. 660 RegExpObject* regexp = &unwrapped->as<RegExpObject>(); 661 662 JS::Realm* oRealm = regexp->realm(); 663 664 // Step 5. If SameValue(thisRealm, oRealm) is false, throw a TypeError 665 // exception. 666 if (thisRealm != oRealm) { 667 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 668 JSMSG_REGEXP_CROSS_REALM); 669 return false; 670 } 671 672 // Step 6. If the value of R’s [[LegacyFeaturesEnabled]] internal slot is 673 // false, throw a TypeError exception. 674 if (!regexp->legacyFeaturesEnabled()) { 675 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 676 JSMSG_REGEXP_LEGACY_FEATURES_DISABLED); 677 return false; 678 } 679 } 680 } 681 682 /* Steps 1-2. */ 683 return CallNonGenericMethod<IsRegExpObject, regexp_compile_impl>(cx, args); 684 } 685 686 /* 687 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1. 688 */ 689 bool js::regexp_construct(JSContext* cx, unsigned argc, Value* vp) { 690 AutoJSConstructorProfilerEntry pseudoFrame(cx, "RegExp"); 691 CallArgs args = CallArgsFromVp(argc, vp); 692 693 RootedObject newTarget(cx); 694 695 // Steps 1. 696 bool patternIsRegExp; 697 if (!IsRegExp(cx, args.get(0), &patternIsRegExp)) { 698 return false; 699 } 700 701 if (!args.isConstructing()) { 702 // Step 3.b. 703 if (patternIsRegExp && !args.hasDefined(1)) { 704 RootedObject patternObj(cx, &args[0].toObject()); 705 706 // Step 3.b.i. 707 RootedValue patternConstructor(cx); 708 if (!GetProperty(cx, patternObj, patternObj, cx->names().constructor, 709 &patternConstructor)) { 710 return false; 711 } 712 713 // Step 3.b.ii. 714 if (patternConstructor.isObject() && 715 patternConstructor.toObject() == args.callee()) { 716 args.rval().set(args[0]); 717 return true; 718 } 719 } 720 } else { 721 newTarget = &args.newTarget().toObject(); 722 } 723 724 RootedValue patternValue(cx, args.get(0)); 725 726 // Step 4. 727 ESClass cls; 728 if (!GetClassOfValue(cx, patternValue, &cls)) { 729 return false; 730 } 731 if (cls == ESClass::RegExp) { 732 // Beware! |patternObj| might be a proxy into another compartment, so 733 // don't assume |patternObj.is<RegExpObject>()|. 734 RootedObject patternObj(cx, &patternValue.toObject()); 735 736 Rooted<JSAtom*> sourceAtom(cx); 737 RegExpFlags flags; 738 RootedRegExpShared shared(cx); 739 { 740 // Step 4.a. 741 shared = RegExpToShared(cx, patternObj); 742 if (!shared) { 743 return false; 744 } 745 sourceAtom = shared->getSource(); 746 747 // Step 4.b. 748 // Get original flags in all cases, to compare with passed flags. 749 flags = shared->getFlags(); 750 751 // If the RegExpShared is in another Zone, don't reuse it. 752 if (cx->zone() != shared->zone()) { 753 shared = nullptr; 754 } 755 } 756 757 // Step 7. 758 RootedObject proto(cx); 759 if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) { 760 return false; 761 } 762 763 Rooted<RegExpObject*> regexp( 764 cx, RegExpAlloc(cx, GenericObject, proto, newTarget)); 765 if (!regexp) { 766 return false; 767 } 768 769 // Step 8. 770 if (args.hasDefined(1)) { 771 // Step 4.c / 21.2.3.2.2 RegExpInitialize step 4. 772 RegExpFlags flagsArg = RegExpFlag::NoFlags; 773 RootedString flagStr(cx, ToString<CanGC>(cx, args[1])); 774 if (!flagStr) { 775 return false; 776 } 777 if (!ParseRegExpFlags(cx, flagStr, &flagsArg)) { 778 return false; 779 } 780 781 // Don't reuse the RegExpShared if we have different flags. 782 if (flags != flagsArg) { 783 shared = nullptr; 784 } 785 786 if (!flags.unicode() && flagsArg.unicode()) { 787 // Have to check syntax again when adding 'u' flag. 788 789 // ES 2017 draft rev 9b49a888e9dfe2667008a01b2754c3662059ae56 790 // 21.2.3.2.2 step 7. 791 shared = CheckPatternSyntax(cx, sourceAtom, flagsArg); 792 if (!shared) { 793 return false; 794 } 795 } 796 flags = flagsArg; 797 } 798 799 regexp->initAndZeroLastIndex(sourceAtom, flags, cx); 800 801 if (shared) { 802 regexp->setShared(shared); 803 } 804 805 args.rval().setObject(*regexp); 806 return true; 807 } 808 809 RootedValue P(cx); 810 RootedValue F(cx); 811 812 // Step 5. 813 if (patternIsRegExp) { 814 RootedObject patternObj(cx, &patternValue.toObject()); 815 816 // Step 5.a. 817 if (!GetProperty(cx, patternObj, patternObj, cx->names().source, &P)) { 818 return false; 819 } 820 821 // Step 5.b. 822 F = args.get(1); 823 if (F.isUndefined()) { 824 if (!GetProperty(cx, patternObj, patternObj, cx->names().flags, &F)) { 825 return false; 826 } 827 } 828 } else { 829 // Steps 6.a-b. 830 P = patternValue; 831 F = args.get(1); 832 } 833 834 // Step 7. 835 RootedObject proto(cx); 836 if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) { 837 return false; 838 } 839 840 Rooted<RegExpObject*> regexp( 841 cx, RegExpAlloc(cx, GenericObject, proto, newTarget)); 842 if (!regexp) { 843 return false; 844 } 845 846 // Step 8. 847 if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) { 848 return false; 849 } 850 regexp->zeroLastIndex(cx); 851 852 args.rval().setObject(*regexp); 853 return true; 854 } 855 856 /* 857 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1 858 * steps 4, 7-8. 859 */ 860 bool js::regexp_construct_raw_flags(JSContext* cx, unsigned argc, Value* vp) { 861 CallArgs args = CallArgsFromVp(argc, vp); 862 MOZ_ASSERT(args.length() == 3); 863 MOZ_ASSERT(!args.isConstructing()); 864 865 // Step 4.a. 866 Rooted<JSAtom*> sourceAtom(cx, AtomizeString(cx, args[0].toString())); 867 if (!sourceAtom) { 868 return false; 869 } 870 871 // Step 4.c. 872 uint32_t rawFlags = args[1].toInt32(); 873 JS::RegExpFlags flags = 874 AssertedCast<uint8_t>(rawFlags & RegExpFlag::AllFlags); 875 876 // Self-hosted code can't check prefs efficiently. In some cases it will 877 // call this with the flag set even when the pref is disabled, in which 878 // case we should ignore it. 879 // TODO(bug 2009034): Clean this up when we ship the proposal. 880 bool legacy = args[2].toBoolean() && JS::Prefs::experimental_legacy_regexp(); 881 882 // Step 7. 883 RegExpObject* regexp = RegExpAlloc(cx, GenericObject); 884 if (!regexp) { 885 return false; 886 } 887 888 // Step 8. 889 regexp->initAndZeroLastIndex(sourceAtom, flags, cx); 890 regexp->setLegacyFeaturesEnabled(legacy); 891 args.rval().setObject(*regexp); 892 return true; 893 } 894 895 // This is a specialized implementation of "UnwrapAndTypeCheckThis" for RegExp 896 // getters that need to return a special value for same-realm 897 // %RegExp.prototype%. 898 template <typename Fn> 899 static bool RegExpGetter(JSContext* cx, CallArgs& args, const char* methodName, 900 Fn&& fn, 901 HandleValue fallbackValue = UndefinedHandleValue) { 902 JSObject* obj = nullptr; 903 if (args.thisv().isObject()) { 904 obj = &args.thisv().toObject(); 905 if (IsWrapper(obj)) { 906 obj = CheckedUnwrapStatic(obj); 907 if (!obj) { 908 ReportAccessDenied(cx); 909 return false; 910 } 911 } 912 } 913 914 if (obj) { 915 // Step 4ff 916 if (obj->is<RegExpObject>()) { 917 return fn(&obj->as<RegExpObject>()); 918 } 919 920 // Step 3.a. "If SameValue(R, %RegExp.prototype%) is true, return 921 // undefined." 922 // Or `return "(?:)"` for get RegExp.prototype.source. 923 if (obj == cx->global()->maybeGetRegExpPrototype()) { 924 args.rval().set(fallbackValue); 925 return true; 926 } 927 928 // fall-through 929 } 930 931 // Step 2. and Step 3.b. 932 JS_ReportErrorNumberLatin1(cx, GetErrorMessage, nullptr, 933 JSMSG_INCOMPATIBLE_REGEXP_GETTER, methodName, 934 InformalValueTypeName(args.thisv())); 935 return false; 936 } 937 938 bool js::regexp_hasIndices(JSContext* cx, unsigned argc, JS::Value* vp) { 939 CallArgs args = CallArgsFromVp(argc, vp); 940 return RegExpGetter(cx, args, "hasIndices", [args](RegExpObject* unwrapped) { 941 args.rval().setBoolean(unwrapped->hasIndices()); 942 return true; 943 }); 944 } 945 946 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 947 // 21.2.5.5 get RegExp.prototype.global 948 bool js::regexp_global(JSContext* cx, unsigned argc, JS::Value* vp) { 949 CallArgs args = CallArgsFromVp(argc, vp); 950 return RegExpGetter(cx, args, "global", [args](RegExpObject* unwrapped) { 951 args.rval().setBoolean(unwrapped->global()); 952 return true; 953 }); 954 } 955 956 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 957 // 21.2.5.6 get RegExp.prototype.ignoreCase 958 bool js::regexp_ignoreCase(JSContext* cx, unsigned argc, JS::Value* vp) { 959 CallArgs args = CallArgsFromVp(argc, vp); 960 return RegExpGetter(cx, args, "ignoreCase", [args](RegExpObject* unwrapped) { 961 args.rval().setBoolean(unwrapped->ignoreCase()); 962 return true; 963 }); 964 } 965 966 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 967 // 21.2.5.9 get RegExp.prototype.multiline 968 bool js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp) { 969 CallArgs args = CallArgsFromVp(argc, vp); 970 return RegExpGetter(cx, args, "multiline", [args](RegExpObject* unwrapped) { 971 args.rval().setBoolean(unwrapped->multiline()); 972 return true; 973 }); 974 } 975 976 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 977 // 21.2.5.12 get RegExp.prototype.source 978 static bool regexp_source(JSContext* cx, unsigned argc, JS::Value* vp) { 979 CallArgs args = CallArgsFromVp(argc, vp); 980 // Step 3.a. Return "(?:)" for %RegExp.prototype%. 981 RootedValue fallback(cx, StringValue(cx->names().emptyRegExp_)); 982 return RegExpGetter( 983 cx, args, "source", 984 [cx, args](RegExpObject* unwrapped) { 985 Rooted<JSAtom*> src(cx, unwrapped->getSource()); 986 MOZ_ASSERT(src); 987 // Mark potentially cross-zone JSAtom. 988 if (cx->zone() != unwrapped->zone()) { 989 cx->markAtom(src); 990 } 991 992 // Step 7. 993 JSString* escaped = EscapeRegExpPattern(cx, src); 994 if (!escaped) { 995 return false; 996 } 997 998 args.rval().setString(escaped); 999 return true; 1000 }, 1001 fallback); 1002 } 1003 1004 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 1005 // 21.2.5.3 get RegExp.prototype.dotAll 1006 bool js::regexp_dotAll(JSContext* cx, unsigned argc, JS::Value* vp) { 1007 CallArgs args = CallArgsFromVp(argc, vp); 1008 return RegExpGetter(cx, args, "dotAll", [args](RegExpObject* unwrapped) { 1009 args.rval().setBoolean(unwrapped->dotAll()); 1010 return true; 1011 }); 1012 } 1013 1014 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 1015 // 21.2.5.14 get RegExp.prototype.sticky 1016 bool js::regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp) { 1017 CallArgs args = CallArgsFromVp(argc, vp); 1018 return RegExpGetter(cx, args, "sticky", [args](RegExpObject* unwrapped) { 1019 args.rval().setBoolean(unwrapped->sticky()); 1020 return true; 1021 }); 1022 } 1023 1024 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 1025 // 21.2.5.17 get RegExp.prototype.unicode 1026 bool js::regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp) { 1027 CallArgs args = CallArgsFromVp(argc, vp); 1028 return RegExpGetter(cx, args, "unicode", [args](RegExpObject* unwrapped) { 1029 args.rval().setBoolean(unwrapped->unicode()); 1030 return true; 1031 }); 1032 } 1033 1034 // https://arai-a.github.io/ecma262-compare/?pr=2418&id=sec-get-regexp.prototype.unicodesets 1035 // 21.2.6.19 get RegExp.prototype.unicodeSets 1036 bool js::regexp_unicodeSets(JSContext* cx, unsigned argc, JS::Value* vp) { 1037 CallArgs args = CallArgsFromVp(argc, vp); 1038 return RegExpGetter(cx, args, "unicodeSets", [args](RegExpObject* unwrapped) { 1039 args.rval().setBoolean(unwrapped->unicodeSets()); 1040 return true; 1041 }); 1042 } 1043 1044 const JSPropertySpec js::regexp_properties[] = { 1045 JS_SELF_HOSTED_GET("flags", "$RegExpFlagsGetter", 0), 1046 JS_INLINABLE_PSG("hasIndices", regexp_hasIndices, 0, RegExpHasIndices), 1047 JS_INLINABLE_PSG("global", regexp_global, 0, RegExpGlobal), 1048 JS_INLINABLE_PSG("ignoreCase", regexp_ignoreCase, 0, RegExpIgnoreCase), 1049 JS_INLINABLE_PSG("multiline", regexp_multiline, 0, RegExpMultiline), 1050 JS_INLINABLE_PSG("dotAll", regexp_dotAll, 0, RegExpDotAll), 1051 JS_PSG("source", regexp_source, 0), 1052 JS_INLINABLE_PSG("sticky", regexp_sticky, 0, RegExpSticky), 1053 JS_INLINABLE_PSG("unicode", regexp_unicode, 0, RegExpUnicode), 1054 JS_INLINABLE_PSG("unicodeSets", regexp_unicodeSets, 0, RegExpUnicodeSets), 1055 JS_PS_END, 1056 }; 1057 1058 const JSFunctionSpec js::regexp_methods[] = { 1059 JS_SELF_HOSTED_FN("toSource", "$RegExpToString", 0, 0), 1060 JS_SELF_HOSTED_FN("toString", "$RegExpToString", 0, 0), 1061 JS_FN("compile", regexp_compile, 2, 0), 1062 JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1, 0), 1063 JS_SELF_HOSTED_FN("test", "RegExpTest", 1, 0), 1064 JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1, 0), 1065 JS_SELF_HOSTED_SYM_FN(matchAll, "RegExpMatchAll", 1, 0), 1066 JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2, 0), 1067 JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1, 0), 1068 JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2, 0), 1069 JS_FS_END, 1070 }; 1071 1072 static constexpr JS::Latin1Char SHOULD_HEX_ESCAPE = JSString::MAX_LATIN1_CHAR; 1073 1074 /** 1075 * Ascii escape map. 1076 * 1077 * 1. If a character is mapped to zero (0x00), then no escape sequence is used. 1078 * 2. Else, 1079 * a. If a character is mapped to SHOULD_HEX_ESCAPE, then hex-escape. 1080 * b. Else, escape with `\` followed by the mapped value. 1081 */ 1082 static constexpr auto AsciiRegExpEscapeMap() { 1083 std::array<JS::Latin1Char, 128> result = {}; 1084 1085 // SyntaxCharacter or U+002F (SOLIDUS) 1086 result['^'] = '^'; 1087 result['$'] = '$'; 1088 result['\\'] = '\\'; 1089 result['.'] = '.'; 1090 result['*'] = '*'; 1091 result['+'] = '+'; 1092 result['?'] = '?'; 1093 result['('] = '('; 1094 result[')'] = ')'; 1095 result['['] = '['; 1096 result[']'] = ']'; 1097 result['{'] = '{'; 1098 result['}'] = '}'; 1099 result['|'] = '|'; 1100 result['/'] = '/'; 1101 1102 // ControlEscape Code Point Values 1103 result['\t'] = 't'; 1104 result['\n'] = 'n'; 1105 result['\v'] = 'v'; 1106 result['\f'] = 'f'; 1107 result['\r'] = 'r'; 1108 1109 // Other punctuators ",-=<>#&!%:;@~'`" or 0x0022 (QUOTATION MARK) 1110 result[','] = SHOULD_HEX_ESCAPE; 1111 result['-'] = SHOULD_HEX_ESCAPE; 1112 result['='] = SHOULD_HEX_ESCAPE; 1113 result['<'] = SHOULD_HEX_ESCAPE; 1114 result['>'] = SHOULD_HEX_ESCAPE; 1115 result['#'] = SHOULD_HEX_ESCAPE; 1116 result['&'] = SHOULD_HEX_ESCAPE; 1117 result['!'] = SHOULD_HEX_ESCAPE; 1118 result['%'] = SHOULD_HEX_ESCAPE; 1119 result[':'] = SHOULD_HEX_ESCAPE; 1120 result[';'] = SHOULD_HEX_ESCAPE; 1121 result['@'] = SHOULD_HEX_ESCAPE; 1122 result['~'] = SHOULD_HEX_ESCAPE; 1123 result['\''] = SHOULD_HEX_ESCAPE; 1124 result['`'] = SHOULD_HEX_ESCAPE; 1125 result['"'] = SHOULD_HEX_ESCAPE; 1126 1127 // WhiteSpace or LineTerminator 1128 result[' '] = SHOULD_HEX_ESCAPE; 1129 1130 return result; 1131 } 1132 1133 /** 1134 * EncodeForRegExpEscape ( c ) 1135 * 1136 * https://tc39.es/proposal-regex-escaping/#sec-encodeforregexpescape 1137 */ 1138 template <typename CharT> 1139 [[nodiscard]] static bool EncodeForRegExpEscape( 1140 mozilla::Span<const CharT> chars, JSStringBuilder& sb) { 1141 MOZ_ASSERT(sb.empty()); 1142 1143 const size_t length = chars.size(); 1144 if (length == 0) { 1145 return true; 1146 } 1147 1148 static constexpr auto asciiEscapeMap = AsciiRegExpEscapeMap(); 1149 1150 // Number of characters added when escaping. 1151 static constexpr size_t EscapeAddLength = 2 - 1; 1152 static constexpr size_t HexEscapeAddLength = 4 - 1; 1153 static constexpr size_t UnicodeEscapeAddLength = 6 - 1; 1154 1155 // Initial scan to determine if escape sequences are needed and to compute 1156 // the output length. 1157 size_t outLength = length; 1158 1159 // Leading Ascii alpha-numeric character is hex-escaped. 1160 size_t scanStart = 0; 1161 if (mozilla::IsAsciiAlphanumeric(chars[0])) { 1162 outLength += HexEscapeAddLength; 1163 scanStart = 1; 1164 } 1165 1166 for (size_t i = scanStart; i < length; i++) { 1167 CharT ch = chars[i]; 1168 1169 JS::Latin1Char escape = 0; 1170 if (mozilla::IsAscii(ch)) { 1171 escape = asciiEscapeMap[ch]; 1172 } else { 1173 // Surrogate pair. 1174 if (unicode::IsLeadSurrogate(ch) && i + 1 < length && 1175 unicode::IsTrailSurrogate(chars[i + 1])) { 1176 i += 1; 1177 continue; 1178 } 1179 1180 // WhiteSpace or LineTerminator or unmatched surrogate. 1181 if (unicode::IsSpace(ch) || unicode::IsSurrogate(ch)) { 1182 escape = SHOULD_HEX_ESCAPE; 1183 } 1184 } 1185 if (!escape) { 1186 continue; 1187 } 1188 1189 if (mozilla::IsAscii(escape)) { 1190 outLength += EscapeAddLength; 1191 } else if (ch <= JSString::MAX_LATIN1_CHAR) { 1192 outLength += HexEscapeAddLength; 1193 } else { 1194 outLength += UnicodeEscapeAddLength; 1195 } 1196 } 1197 1198 // Return if no escape sequences are needed. 1199 if (outLength == length) { 1200 return true; 1201 } 1202 MOZ_ASSERT(outLength > length); 1203 1204 // Inflating is fallible, so we have to convert to two-byte upfront. 1205 if constexpr (std::is_same_v<CharT, char16_t>) { 1206 if (!sb.ensureTwoByteChars()) { 1207 return false; 1208 } 1209 } 1210 1211 // Allocate memory for the output using the final length. 1212 if (!sb.reserve(outLength)) { 1213 return false; 1214 } 1215 1216 // NB: Lower case hex digits. 1217 static constexpr char HexDigits[] = "0123456789abcdef"; 1218 static_assert(std::char_traits<char>::length(HexDigits) == 16); 1219 1220 // Append |ch| as an escaped character. 1221 auto appendEscape = [&](JS::Latin1Char ch) { 1222 MOZ_ASSERT(mozilla::IsAscii(ch)); 1223 1224 sb.infallibleAppend('\\'); 1225 sb.infallibleAppend(ch); 1226 }; 1227 1228 // Append |ch| as a hex-escape sequence. 1229 auto appendHexEscape = [&](CharT ch) { 1230 MOZ_ASSERT(ch <= JSString::MAX_LATIN1_CHAR); 1231 1232 sb.infallibleAppend('\\'); 1233 sb.infallibleAppend('x'); 1234 sb.infallibleAppend(HexDigits[(ch >> 4) & 0xf]); 1235 sb.infallibleAppend(HexDigits[ch & 0xf]); 1236 }; 1237 1238 // Append |ch| as a Unicode-escape sequence. 1239 auto appendUnicodeEscape = [&](char16_t ch) { 1240 MOZ_ASSERT(ch > JSString::MAX_LATIN1_CHAR); 1241 1242 sb.infallibleAppend('\\'); 1243 sb.infallibleAppend('u'); 1244 sb.infallibleAppend(HexDigits[(ch >> 12) & 0xf]); 1245 sb.infallibleAppend(HexDigits[(ch >> 8) & 0xf]); 1246 sb.infallibleAppend(HexDigits[(ch >> 4) & 0xf]); 1247 sb.infallibleAppend(HexDigits[ch & 0xf]); 1248 }; 1249 1250 // Index after the last character which produced an escape sequence. 1251 size_t startUnescaped = 0; 1252 1253 // Append unescaped characters from |startUnescaped| (inclusive) to |end| 1254 // (exclusive). 1255 auto appendUnescaped = [&](size_t end) { 1256 MOZ_ASSERT(startUnescaped <= end && end <= length); 1257 1258 if (startUnescaped < end) { 1259 auto unescaped = chars.FromTo(startUnescaped, end); 1260 sb.infallibleAppend(unescaped.data(), unescaped.size()); 1261 } 1262 startUnescaped = end + 1; 1263 }; 1264 1265 // Leading Ascii alpha-numeric character is hex-escaped. 1266 size_t start = 0; 1267 if (mozilla::IsAsciiAlphanumeric(chars[0])) { 1268 appendHexEscape(chars[0]); 1269 1270 start = 1; 1271 startUnescaped = 1; 1272 } 1273 1274 for (size_t i = start; i < length; i++) { 1275 CharT ch = chars[i]; 1276 1277 JS::Latin1Char escape = 0; 1278 if (mozilla::IsAscii(ch)) { 1279 escape = asciiEscapeMap[ch]; 1280 } else { 1281 // Surrogate pair. 1282 if (unicode::IsLeadSurrogate(ch) && i + 1 < length && 1283 unicode::IsTrailSurrogate(chars[i + 1])) { 1284 i += 1; 1285 continue; 1286 } 1287 1288 // WhiteSpace or LineTerminator or unmatched surrogate. 1289 if (unicode::IsSpace(ch) || unicode::IsSurrogate(ch)) { 1290 escape = SHOULD_HEX_ESCAPE; 1291 } 1292 } 1293 if (!escape) { 1294 continue; 1295 } 1296 1297 appendUnescaped(i); 1298 1299 if (mozilla::IsAscii(escape)) { 1300 appendEscape(escape); 1301 } else if (ch <= JSString::MAX_LATIN1_CHAR) { 1302 appendHexEscape(ch); 1303 } else { 1304 appendUnicodeEscape(ch); 1305 } 1306 } 1307 1308 if (startUnescaped) { 1309 appendUnescaped(length); 1310 } 1311 1312 MOZ_ASSERT(sb.length() == outLength, "all characters were written"); 1313 return true; 1314 } 1315 1316 [[nodiscard]] static bool EncodeForRegExpEscape(JSLinearString* string, 1317 JSStringBuilder& sb) { 1318 JS::AutoCheckCannotGC nogc; 1319 if (string->hasLatin1Chars()) { 1320 auto chars = mozilla::Span(string->latin1Range(nogc)); 1321 return EncodeForRegExpEscape(chars, sb); 1322 } 1323 auto chars = mozilla::Span(string->twoByteRange(nogc)); 1324 return EncodeForRegExpEscape(chars, sb); 1325 } 1326 1327 /** 1328 * RegExp.escape ( S ) 1329 * 1330 * https://tc39.es/proposal-regex-escaping/ 1331 */ 1332 static bool regexp_escape(JSContext* cx, unsigned argc, Value* vp) { 1333 CallArgs args = CallArgsFromVp(argc, vp); 1334 1335 // Step 1. 1336 if (!args.get(0).isString()) { 1337 return ReportValueError(cx, JSMSG_UNEXPECTED_TYPE, JSDVG_SEARCH_STACK, 1338 args.get(0), nullptr, "not a string"); 1339 } 1340 1341 Rooted<JSLinearString*> string(cx, args[0].toString()->ensureLinear(cx)); 1342 if (!string) { 1343 return false; 1344 } 1345 1346 // Step 2-5. 1347 JSStringBuilder sb(cx); 1348 if (!EncodeForRegExpEscape(string, sb)) { 1349 return false; 1350 } 1351 1352 // Return the input string if no escape sequences were added. 1353 if (sb.empty()) { 1354 args.rval().setString(string); 1355 return true; 1356 } 1357 1358 auto* result = sb.finishString(); 1359 if (!result) { 1360 return false; 1361 } 1362 1363 args.rval().setString(result); 1364 return true; 1365 } 1366 1367 #define STATIC_PAREN_GETTER_CODE(parenNum) \ 1368 if (!res->createParen(cx, parenNum, args.rval())) return false; \ 1369 if (args.rval().isUndefined()) \ 1370 args.rval().setString(cx->runtime()->emptyString); \ 1371 return true 1372 1373 /* 1374 * RegExp static properties. 1375 * 1376 * RegExp class static properties and their Perl counterparts: 1377 * 1378 * RegExp.input $_ 1379 * RegExp.lastMatch $& 1380 * RegExp.lastParen $+ 1381 * RegExp.leftContext $` 1382 * RegExp.rightContext $' 1383 */ 1384 1385 static bool checkRegexpLegacyFeatures(JSContext* cx, const CallArgs& args, 1386 const char* name) { 1387 if (JS::Prefs::experimental_legacy_regexp()) { 1388 /* Step 1. Assert C is an object that has an internal slot named 1389 * internalSlotName.*/ 1390 JSObject* regexpCtor = 1391 GlobalObject::getOrCreateRegExpConstructor(cx, cx->global()); 1392 if (!regexpCtor) return false; 1393 1394 /* Step 2. If SameValue(C, thisValue) is false, throw TypeError */ 1395 bool same = false; 1396 if (!args.thisv().isObject() || 1397 !SameValue(cx, args.thisv(), ObjectValue(*regexpCtor), &same) || 1398 !same) { 1399 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 1400 JSMSG_INCOMPATIBLE_RECEIVER, name, 1401 InformalValueTypeName(args.thisv())); 1402 return false; 1403 } 1404 1405 /* Step 4. If val is empty, throw a TypeError exception */ 1406 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); 1407 if (!res) return false; 1408 if (res->isInvalidated()) { 1409 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 1410 JSMSG_REGEXP_STATIC_EMPTY, name, 1411 InformalValueTypeName(args.thisv())); 1412 return false; 1413 } 1414 } 1415 return true; 1416 } 1417 1418 #define DEFINE_STATIC_GETTER(name, code) \ 1419 static bool name(JSContext* cx, unsigned argc, Value* vp) { \ 1420 CallArgs args = CallArgsFromVp(argc, vp); \ 1421 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \ 1422 if (!res) return false; \ 1423 if (!checkRegexpLegacyFeatures(cx, args, #name)) return false; \ 1424 code; \ 1425 } 1426 1427 DEFINE_STATIC_GETTER(static_input_getter, 1428 return res->createPendingInput(cx, args.rval())) 1429 DEFINE_STATIC_GETTER(static_lastMatch_getter, 1430 return res->createLastMatch(cx, args.rval())) 1431 DEFINE_STATIC_GETTER(static_lastParen_getter, 1432 return res->createLastParen(cx, args.rval())) 1433 DEFINE_STATIC_GETTER(static_leftContext_getter, 1434 return res->createLeftContext(cx, args.rval())) 1435 DEFINE_STATIC_GETTER(static_rightContext_getter, 1436 return res->createRightContext(cx, args.rval())) 1437 1438 DEFINE_STATIC_GETTER(static_paren1_getter, STATIC_PAREN_GETTER_CODE(1)) 1439 DEFINE_STATIC_GETTER(static_paren2_getter, STATIC_PAREN_GETTER_CODE(2)) 1440 DEFINE_STATIC_GETTER(static_paren3_getter, STATIC_PAREN_GETTER_CODE(3)) 1441 DEFINE_STATIC_GETTER(static_paren4_getter, STATIC_PAREN_GETTER_CODE(4)) 1442 DEFINE_STATIC_GETTER(static_paren5_getter, STATIC_PAREN_GETTER_CODE(5)) 1443 DEFINE_STATIC_GETTER(static_paren6_getter, STATIC_PAREN_GETTER_CODE(6)) 1444 DEFINE_STATIC_GETTER(static_paren7_getter, STATIC_PAREN_GETTER_CODE(7)) 1445 DEFINE_STATIC_GETTER(static_paren8_getter, STATIC_PAREN_GETTER_CODE(8)) 1446 DEFINE_STATIC_GETTER(static_paren9_getter, STATIC_PAREN_GETTER_CODE(9)) 1447 1448 #define DEFINE_STATIC_SETTER(name, code) \ 1449 static bool name(JSContext* cx, unsigned argc, Value* vp) { \ 1450 CallArgs args = CallArgsFromVp(argc, vp); \ 1451 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \ 1452 if (!res) return false; \ 1453 if (!checkRegexpLegacyFeatures(cx, args, #name)) return false; \ 1454 code; \ 1455 return true; \ 1456 } 1457 1458 static bool static_input_setter(JSContext* cx, unsigned argc, Value* vp) { 1459 CallArgs args = CallArgsFromVp(argc, vp); 1460 if (JS::Prefs::experimental_legacy_regexp()) { 1461 // Step 1. Assert C is an object that has an internal slot named 1462 // internalSlotName. 1463 JSObject* regexpCtor = 1464 GlobalObject::getOrCreateRegExpConstructor(cx, cx->global()); 1465 if (!regexpCtor) { 1466 return false; 1467 } 1468 1469 // Step 2. If SameValue(C, thisValue) is false, throw a TypeError exception. 1470 bool same = false; 1471 if (!args.thisv().isObject() || 1472 !SameValue(cx, args.thisv(), ObjectValue(*regexpCtor), &same) || 1473 !same) { 1474 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 1475 JSMSG_INCOMPATIBLE_RECEIVER, 1476 InformalValueTypeName(args.thisv())); 1477 return false; 1478 } 1479 } 1480 1481 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); 1482 if (!res) { 1483 return false; 1484 } 1485 1486 // Step 3. Let strVal be ? ToString(val). 1487 RootedString str(cx, ToString<CanGC>(cx, args.get(0))); 1488 if (!str) { 1489 return false; 1490 } 1491 1492 // Step 4. Set the value of the internal slot of C named internalSlotName to 1493 // strVal. 1494 res->setPendingInput(str); 1495 args.rval().setString(str); 1496 return true; 1497 } 1498 1499 #ifdef NIGHTLY_BUILD 1500 const JSPropertySpec js::regexp_static_props[] = { 1501 JS_PSGS("input", static_input_getter, static_input_setter, 0), 1502 JS_PSG("lastMatch", static_lastMatch_getter, 0), 1503 JS_PSG("lastParen", static_lastParen_getter, 0), 1504 JS_PSG("leftContext", static_leftContext_getter, 0), 1505 JS_PSG("rightContext", static_rightContext_getter, 0), 1506 JS_PSG("$1", static_paren1_getter, 0), 1507 JS_PSG("$2", static_paren2_getter, 0), 1508 JS_PSG("$3", static_paren3_getter, 0), 1509 JS_PSG("$4", static_paren4_getter, 0), 1510 JS_PSG("$5", static_paren5_getter, 0), 1511 JS_PSG("$6", static_paren6_getter, 0), 1512 JS_PSG("$7", static_paren7_getter, 0), 1513 JS_PSG("$8", static_paren8_getter, 0), 1514 JS_PSG("$9", static_paren9_getter, 0), 1515 JS_PSGS("$_", static_input_getter, static_input_setter, 0), 1516 JS_PSG("$&", static_lastMatch_getter, 0), 1517 JS_PSG("$+", static_lastParen_getter, 0), 1518 JS_PSG("$`", static_leftContext_getter, 0), 1519 JS_PSG("$'", static_rightContext_getter, 0), 1520 JS_SELF_HOSTED_SYM_GET(species, "$RegExpSpecies", 0), 1521 JS_PS_END, 1522 }; 1523 #else 1524 const JSPropertySpec js::regexp_static_props[] = { 1525 JS_PSGS("input", static_input_getter, static_input_setter, 1526 JSPROP_PERMANENT | JSPROP_ENUMERATE), 1527 JS_PSG("lastMatch", static_lastMatch_getter, 1528 JSPROP_PERMANENT | JSPROP_ENUMERATE), 1529 JS_PSG("lastParen", static_lastParen_getter, 1530 JSPROP_PERMANENT | JSPROP_ENUMERATE), 1531 JS_PSG("leftContext", static_leftContext_getter, 1532 JSPROP_PERMANENT | JSPROP_ENUMERATE), 1533 JS_PSG("rightContext", static_rightContext_getter, 1534 JSPROP_PERMANENT | JSPROP_ENUMERATE), 1535 JS_PSG("$1", static_paren1_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), 1536 JS_PSG("$2", static_paren2_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), 1537 JS_PSG("$3", static_paren3_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), 1538 JS_PSG("$4", static_paren4_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), 1539 JS_PSG("$5", static_paren5_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), 1540 JS_PSG("$6", static_paren6_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), 1541 JS_PSG("$7", static_paren7_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), 1542 JS_PSG("$8", static_paren8_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), 1543 JS_PSG("$9", static_paren9_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), 1544 JS_PSGS("$_", static_input_getter, static_input_setter, JSPROP_PERMANENT), 1545 JS_PSG("$&", static_lastMatch_getter, JSPROP_PERMANENT), 1546 JS_PSG("$+", static_lastParen_getter, JSPROP_PERMANENT), 1547 JS_PSG("$`", static_leftContext_getter, JSPROP_PERMANENT), 1548 JS_PSG("$'", static_rightContext_getter, JSPROP_PERMANENT), 1549 JS_SELF_HOSTED_SYM_GET(species, "$RegExpSpecies", 0), 1550 JS_PS_END, 1551 }; 1552 #endif 1553 1554 const JSFunctionSpec js::regexp_static_methods[] = { 1555 JS_FN("escape", regexp_escape, 1, 0), 1556 JS_FS_END, 1557 }; 1558 1559 /* 1560 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 1561 * steps 3, 9-14, except 12.a.i, 12.c.i.1. 1562 */ 1563 static RegExpRunStatus ExecuteRegExp(JSContext* cx, HandleObject regexp, 1564 HandleString string, int32_t lastIndex, 1565 VectorMatchPairs* matches) { 1566 /* 1567 * WARNING: Despite the presence of spec step comment numbers, this 1568 * algorithm isn't consistent with any ES6 version, draft or 1569 * otherwise. YOU HAVE BEEN WARNED. 1570 */ 1571 1572 /* Steps 1-2 performed by the caller. */ 1573 Handle<RegExpObject*> reobj = regexp.as<RegExpObject>(); 1574 1575 RootedRegExpShared re(cx, RegExpObject::getShared(cx, reobj)); 1576 if (!re) { 1577 return RegExpRunStatus::Error; 1578 } 1579 1580 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); 1581 if (!res) { 1582 return RegExpRunStatus::Error; 1583 } 1584 1585 Rooted<JSLinearString*> input(cx, string->ensureLinear(cx)); 1586 if (!input) { 1587 return RegExpRunStatus::Error; 1588 } 1589 1590 /* Handled by caller */ 1591 MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length()); 1592 1593 /* Steps 4-8 performed by the caller. */ 1594 1595 /* Steps 3, 10-14, except 12.a.i, 12.c.i.1. */ 1596 RegExpRunStatus status = 1597 ExecuteRegExpImpl(cx, res, &re, input, lastIndex, matches, reobj); 1598 if (status == RegExpRunStatus::Error) { 1599 return RegExpRunStatus::Error; 1600 } 1601 1602 /* Steps 12.a.i, 12.c.i.i, 15 are done by Self-hosted function. */ 1603 return status; 1604 } 1605 1606 /* 1607 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 1608 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. 1609 */ 1610 static bool RegExpMatcherImpl(JSContext* cx, HandleObject regexp, 1611 HandleString string, int32_t lastIndex, 1612 MutableHandleValue rval) { 1613 /* Execute regular expression and gather matches. */ 1614 VectorMatchPairs matches; 1615 1616 /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */ 1617 RegExpRunStatus status = 1618 ExecuteRegExp(cx, regexp, string, lastIndex, &matches); 1619 if (status == RegExpRunStatus::Error) { 1620 return false; 1621 } 1622 1623 /* Steps 12.a, 12.c. */ 1624 if (status == RegExpRunStatus::Success_NotFound) { 1625 rval.setNull(); 1626 return true; 1627 } 1628 1629 /* Steps 16-25 */ 1630 RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared()); 1631 return CreateRegExpMatchResult(cx, shared, string, matches, rval); 1632 } 1633 1634 /* 1635 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 1636 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. 1637 */ 1638 bool js::RegExpMatcher(JSContext* cx, unsigned argc, Value* vp) { 1639 CallArgs args = CallArgsFromVp(argc, vp); 1640 MOZ_ASSERT(args.length() == 3); 1641 MOZ_ASSERT(IsRegExpObject(args[0])); 1642 MOZ_ASSERT(args[1].isString()); 1643 MOZ_ASSERT(args[2].isNumber()); 1644 1645 RootedObject regexp(cx, &args[0].toObject()); 1646 RootedString string(cx, args[1].toString()); 1647 1648 int32_t lastIndex; 1649 MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex)); 1650 1651 /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */ 1652 return RegExpMatcherImpl(cx, regexp, string, lastIndex, args.rval()); 1653 } 1654 1655 /* 1656 * Separate interface for use by the JITs. 1657 * This code cannot re-enter JIT code. 1658 */ 1659 bool js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp, 1660 HandleString input, int32_t lastIndex, 1661 MatchPairs* maybeMatches, MutableHandleValue output) { 1662 MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length()); 1663 1664 // RegExp execution was successful only if the pairs have actually been 1665 // filled in. Note that IC code always passes a nullptr maybeMatches. 1666 if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) { 1667 RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared()); 1668 return CreateRegExpMatchResult(cx, shared, input, *maybeMatches, output); 1669 } 1670 return RegExpMatcherImpl(cx, regexp, input, lastIndex, output); 1671 } 1672 1673 /* 1674 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 1675 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. 1676 * This code is inlined in CodeGenerator.cpp generateRegExpSearcherStub, 1677 * changes to this code need to get reflected in there too. 1678 */ 1679 static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp, 1680 HandleString string, int32_t lastIndex, 1681 int32_t* result) { 1682 /* Execute regular expression and gather matches. */ 1683 VectorMatchPairs matches; 1684 1685 #ifdef DEBUG 1686 // Ensure we assert if RegExpSearcherLastLimit is called when there's no 1687 // match. 1688 cx->regExpSearcherLastLimit = RegExpSearcherLastLimitSentinel; 1689 #endif 1690 1691 /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */ 1692 RegExpRunStatus status = 1693 ExecuteRegExp(cx, regexp, string, lastIndex, &matches); 1694 if (status == RegExpRunStatus::Error) { 1695 return false; 1696 } 1697 1698 /* Steps 12.a, 12.c. */ 1699 if (status == RegExpRunStatus::Success_NotFound) { 1700 *result = -1; 1701 return true; 1702 } 1703 1704 /* Steps 16-25 */ 1705 *result = CreateRegExpSearchResult(cx, matches); 1706 return true; 1707 } 1708 1709 /* 1710 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 1711 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. 1712 */ 1713 bool js::RegExpSearcher(JSContext* cx, unsigned argc, Value* vp) { 1714 CallArgs args = CallArgsFromVp(argc, vp); 1715 MOZ_ASSERT(args.length() == 3); 1716 MOZ_ASSERT(IsRegExpObject(args[0])); 1717 MOZ_ASSERT(args[1].isString()); 1718 MOZ_ASSERT(args[2].isNumber()); 1719 1720 RootedObject regexp(cx, &args[0].toObject()); 1721 RootedString string(cx, args[1].toString()); 1722 1723 int32_t lastIndex; 1724 MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex)); 1725 1726 /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */ 1727 int32_t result = 0; 1728 if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, &result)) { 1729 return false; 1730 } 1731 1732 args.rval().setInt32(result); 1733 return true; 1734 } 1735 1736 /* 1737 * Separate interface for use by the JITs. 1738 * This code cannot re-enter JIT code. 1739 */ 1740 bool js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp, 1741 HandleString input, int32_t lastIndex, 1742 MatchPairs* maybeMatches, int32_t* result) { 1743 MOZ_ASSERT(lastIndex >= 0); 1744 1745 // RegExp execution was successful only if the pairs have actually been 1746 // filled in. Note that IC code always passes a nullptr maybeMatches. 1747 if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) { 1748 *result = CreateRegExpSearchResult(cx, *maybeMatches); 1749 return true; 1750 } 1751 return RegExpSearcherImpl(cx, regexp, input, lastIndex, result); 1752 } 1753 1754 bool js::RegExpSearcherLastLimit(JSContext* cx, unsigned argc, Value* vp) { 1755 CallArgs args = CallArgsFromVp(argc, vp); 1756 MOZ_ASSERT(args.length() == 1); 1757 MOZ_ASSERT(args[0].isString()); 1758 1759 // Assert the limit is not the sentinel value and is valid for this string. 1760 MOZ_ASSERT(cx->regExpSearcherLastLimit != RegExpSearcherLastLimitSentinel); 1761 MOZ_ASSERT(cx->regExpSearcherLastLimit <= args[0].toString()->length()); 1762 1763 args.rval().setInt32(cx->regExpSearcherLastLimit); 1764 1765 #ifdef DEBUG 1766 // Ensure we assert if this function is called again without a new call to 1767 // RegExpSearcher. 1768 cx->regExpSearcherLastLimit = RegExpSearcherLastLimitSentinel; 1769 #endif 1770 return true; 1771 } 1772 1773 template <bool CalledFromJit> 1774 static bool RegExpBuiltinExecMatchRaw(JSContext* cx, 1775 Handle<RegExpObject*> regexp, 1776 HandleString input, int32_t lastIndex, 1777 MatchPairs* maybeMatches, 1778 MutableHandleValue output) { 1779 MOZ_ASSERT(lastIndex >= 0); 1780 MOZ_ASSERT(size_t(lastIndex) <= input->length()); 1781 MOZ_ASSERT_IF(!CalledFromJit, !maybeMatches); 1782 1783 // RegExp execution was successful only if the pairs have actually been 1784 // filled in. Note that IC code always passes a nullptr maybeMatches. 1785 int32_t lastIndexNew = 0; 1786 if (CalledFromJit && maybeMatches && 1787 maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) { 1788 RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared()); 1789 if (!CreateRegExpMatchResult(cx, shared, input, *maybeMatches, output)) { 1790 return false; 1791 } 1792 lastIndexNew = (*maybeMatches)[0].limit; 1793 } else { 1794 VectorMatchPairs matches; 1795 RegExpRunStatus status = 1796 ExecuteRegExp(cx, regexp, input, lastIndex, &matches); 1797 if (status == RegExpRunStatus::Error) { 1798 return false; 1799 } 1800 if (status == RegExpRunStatus::Success_NotFound) { 1801 output.setNull(); 1802 lastIndexNew = 0; 1803 } else { 1804 RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared()); 1805 if (!CreateRegExpMatchResult(cx, shared, input, matches, output)) { 1806 return false; 1807 } 1808 lastIndexNew = matches[0].limit; 1809 } 1810 } 1811 1812 RegExpFlags flags = regexp->getFlags(); 1813 if (!flags.global() && !flags.sticky()) { 1814 return true; 1815 } 1816 1817 return SetLastIndex<CalledFromJit>(cx, regexp, lastIndexNew); 1818 } 1819 1820 bool js::RegExpBuiltinExecMatchFromJit(JSContext* cx, 1821 Handle<RegExpObject*> regexp, 1822 HandleString input, 1823 MatchPairs* maybeMatches, 1824 MutableHandleValue output) { 1825 int32_t lastIndex = 0; 1826 if (regexp->isGlobalOrSticky()) { 1827 lastIndex = regexp->getLastIndex().toInt32(); 1828 MOZ_ASSERT(lastIndex >= 0); 1829 if (size_t(lastIndex) > input->length()) { 1830 output.setNull(); 1831 return SetLastIndex<true>(cx, regexp, 0); 1832 } 1833 } 1834 return RegExpBuiltinExecMatchRaw<true>(cx, regexp, input, lastIndex, 1835 maybeMatches, output); 1836 } 1837 1838 template <bool CalledFromJit> 1839 static bool RegExpBuiltinExecTestRaw(JSContext* cx, 1840 Handle<RegExpObject*> regexp, 1841 HandleString input, int32_t lastIndex, 1842 bool* result) { 1843 MOZ_ASSERT(lastIndex >= 0); 1844 MOZ_ASSERT(size_t(lastIndex) <= input->length()); 1845 1846 VectorMatchPairs matches; 1847 RegExpRunStatus status = 1848 ExecuteRegExp(cx, regexp, input, lastIndex, &matches); 1849 if (status == RegExpRunStatus::Error) { 1850 return false; 1851 } 1852 1853 *result = (status == RegExpRunStatus::Success); 1854 1855 RegExpFlags flags = regexp->getFlags(); 1856 if (!flags.global() && !flags.sticky()) { 1857 return true; 1858 } 1859 1860 int32_t lastIndexNew = *result ? matches[0].limit : 0; 1861 return SetLastIndex<CalledFromJit>(cx, regexp, lastIndexNew); 1862 } 1863 1864 bool js::RegExpBuiltinExecTestFromJit(JSContext* cx, 1865 Handle<RegExpObject*> regexp, 1866 HandleString input, bool* result) { 1867 int32_t lastIndex = 0; 1868 if (regexp->isGlobalOrSticky()) { 1869 lastIndex = regexp->getLastIndex().toInt32(); 1870 MOZ_ASSERT(lastIndex >= 0); 1871 if (size_t(lastIndex) > input->length()) { 1872 *result = false; 1873 return SetLastIndex<true>(cx, regexp, 0); 1874 } 1875 } 1876 return RegExpBuiltinExecTestRaw<true>(cx, regexp, input, lastIndex, result); 1877 } 1878 1879 using CapturesVector = GCVector<Value, 4>; 1880 1881 struct JSSubString { 1882 JSLinearString* base = nullptr; 1883 size_t offset = 0; 1884 size_t length = 0; 1885 1886 JSSubString() = default; 1887 1888 void initEmpty(JSLinearString* base) { 1889 this->base = base; 1890 offset = length = 0; 1891 } 1892 void init(JSLinearString* base, size_t offset, size_t length) { 1893 this->base = base; 1894 this->offset = offset; 1895 this->length = length; 1896 } 1897 }; 1898 1899 static void GetParen(JSLinearString* matched, const JS::Value& capture, 1900 JSSubString* out) { 1901 if (capture.isUndefined()) { 1902 out->initEmpty(matched); 1903 return; 1904 } 1905 JSLinearString& captureLinear = capture.toString()->asLinear(); 1906 out->init(&captureLinear, 0, captureLinear.length()); 1907 } 1908 1909 template <typename CharT> 1910 static bool InterpretDollar(JSLinearString* matched, JSLinearString* string, 1911 size_t position, size_t tailPos, 1912 Handle<CapturesVector> captures, 1913 Handle<CapturesVector> namedCaptures, 1914 JSLinearString* replacement, 1915 const CharT* replacementBegin, 1916 const CharT* currentDollar, 1917 const CharT* replacementEnd, JSSubString* out, 1918 size_t* skip, uint32_t* currentNamedCapture) { 1919 MOZ_ASSERT(*currentDollar == '$'); 1920 1921 /* If there is only a dollar, bail now. */ 1922 if (currentDollar + 1 >= replacementEnd) { 1923 return false; 1924 } 1925 1926 // ES 2021 Table 57: Replacement Text Symbol Substitutions 1927 // https://tc39.es/ecma262/#table-replacement-text-symbol-substitutions 1928 char16_t c = currentDollar[1]; 1929 if (IsAsciiDigit(c)) { 1930 /* $n, $nn */ 1931 unsigned num = AsciiDigitToNumber(c); 1932 if (num > captures.length()) { 1933 // The result is implementation-defined. Do not substitute. 1934 return false; 1935 } 1936 1937 const CharT* currentChar = currentDollar + 2; 1938 if (currentChar < replacementEnd) { 1939 c = *currentChar; 1940 if (IsAsciiDigit(c)) { 1941 unsigned tmpNum = 10 * num + AsciiDigitToNumber(c); 1942 // If num > captures.length(), the result is implementation-defined. 1943 // Consume next character only if num <= captures.length(). 1944 if (tmpNum <= captures.length()) { 1945 currentChar++; 1946 num = tmpNum; 1947 } 1948 } 1949 } 1950 1951 if (num == 0) { 1952 // The result is implementation-defined. Do not substitute. 1953 return false; 1954 } 1955 1956 *skip = currentChar - currentDollar; 1957 1958 MOZ_ASSERT(num <= captures.length()); 1959 1960 GetParen(matched, captures[num - 1], out); 1961 return true; 1962 } 1963 1964 // '$<': Named Captures 1965 if (c == '<') { 1966 // Step 1. 1967 if (namedCaptures.length() == 0) { 1968 return false; 1969 } 1970 1971 // Step 2.b 1972 const CharT* nameStart = currentDollar + 2; 1973 const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd); 1974 1975 // Step 2.c 1976 if (!nameEnd) { 1977 return false; 1978 } 1979 1980 // Step 2.d 1981 // We precompute named capture replacements in InitNamedCaptures. 1982 // They are stored in the order in which we will need them, so here 1983 // we can just take the next one in the list. 1984 size_t nameLength = nameEnd - nameStart; 1985 *skip = nameLength + 3; // $<...> 1986 1987 // Steps 2.d.iii-iv 1988 GetParen(matched, namedCaptures[*currentNamedCapture], out); 1989 *currentNamedCapture += 1; 1990 return true; 1991 } 1992 1993 switch (c) { 1994 default: 1995 return false; 1996 case '$': 1997 out->init(replacement, currentDollar - replacementBegin, 1); 1998 break; 1999 case '&': 2000 out->init(matched, 0, matched->length()); 2001 break; 2002 case '`': 2003 out->init(string, 0, position); 2004 break; 2005 case '\'': 2006 if (tailPos >= string->length()) { 2007 out->initEmpty(matched); 2008 } else { 2009 out->init(string, tailPos, string->length() - tailPos); 2010 } 2011 break; 2012 } 2013 2014 *skip = 2; 2015 return true; 2016 } 2017 2018 template <typename CharT> 2019 static bool FindReplaceLengthString(JSContext* cx, 2020 Handle<JSLinearString*> matched, 2021 Handle<JSLinearString*> string, 2022 size_t position, size_t tailPos, 2023 Handle<CapturesVector> captures, 2024 Handle<CapturesVector> namedCaptures, 2025 Handle<JSLinearString*> replacement, 2026 size_t firstDollarIndex, size_t* sizep) { 2027 CheckedInt<uint32_t> replen = replacement->length(); 2028 2029 JS::AutoCheckCannotGC nogc; 2030 MOZ_ASSERT(firstDollarIndex < replacement->length()); 2031 const CharT* replacementBegin = replacement->chars<CharT>(nogc); 2032 const CharT* currentDollar = replacementBegin + firstDollarIndex; 2033 const CharT* replacementEnd = replacementBegin + replacement->length(); 2034 uint32_t currentNamedCapture = 0; 2035 do { 2036 JSSubString sub; 2037 size_t skip; 2038 if (InterpretDollar(matched, string, position, tailPos, captures, 2039 namedCaptures, replacement, replacementBegin, 2040 currentDollar, replacementEnd, &sub, &skip, 2041 ¤tNamedCapture)) { 2042 if (sub.length > skip) { 2043 replen += sub.length - skip; 2044 } else { 2045 replen -= skip - sub.length; 2046 } 2047 currentDollar += skip; 2048 } else { 2049 currentDollar++; 2050 } 2051 2052 currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd); 2053 } while (currentDollar); 2054 2055 if (!replen.isValid()) { 2056 ReportAllocationOverflow(cx); 2057 return false; 2058 } 2059 2060 *sizep = replen.value(); 2061 return true; 2062 } 2063 2064 static bool FindReplaceLength(JSContext* cx, Handle<JSLinearString*> matched, 2065 Handle<JSLinearString*> string, size_t position, 2066 size_t tailPos, Handle<CapturesVector> captures, 2067 Handle<CapturesVector> namedCaptures, 2068 Handle<JSLinearString*> replacement, 2069 size_t firstDollarIndex, size_t* sizep) { 2070 return replacement->hasLatin1Chars() 2071 ? FindReplaceLengthString<Latin1Char>( 2072 cx, matched, string, position, tailPos, captures, 2073 namedCaptures, replacement, firstDollarIndex, sizep) 2074 : FindReplaceLengthString<char16_t>( 2075 cx, matched, string, position, tailPos, captures, 2076 namedCaptures, replacement, firstDollarIndex, sizep); 2077 } 2078 2079 /* 2080 * Precondition: |sb| already has necessary growth space reserved (as 2081 * derived from FindReplaceLength), and has been inflated to TwoByte if 2082 * necessary. 2083 */ 2084 template <typename CharT> 2085 static void DoReplace(Handle<JSLinearString*> matched, 2086 Handle<JSLinearString*> string, size_t position, 2087 size_t tailPos, Handle<CapturesVector> captures, 2088 Handle<CapturesVector> namedCaptures, 2089 Handle<JSLinearString*> replacement, 2090 size_t firstDollarIndex, StringBuilder& sb) { 2091 JS::AutoCheckCannotGC nogc; 2092 const CharT* replacementBegin = replacement->chars<CharT>(nogc); 2093 const CharT* currentChar = replacementBegin; 2094 2095 MOZ_ASSERT(firstDollarIndex < replacement->length()); 2096 const CharT* currentDollar = replacementBegin + firstDollarIndex; 2097 const CharT* replacementEnd = replacementBegin + replacement->length(); 2098 uint32_t currentNamedCapture = 0; 2099 do { 2100 /* Move one of the constant portions of the replacement value. */ 2101 size_t len = currentDollar - currentChar; 2102 sb.infallibleAppend(currentChar, len); 2103 currentChar = currentDollar; 2104 2105 JSSubString sub; 2106 size_t skip; 2107 if (InterpretDollar(matched, string, position, tailPos, captures, 2108 namedCaptures, replacement, replacementBegin, 2109 currentDollar, replacementEnd, &sub, &skip, 2110 ¤tNamedCapture)) { 2111 sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length); 2112 currentChar += skip; 2113 currentDollar += skip; 2114 } else { 2115 currentDollar++; 2116 } 2117 2118 currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd); 2119 } while (currentDollar); 2120 sb.infallibleAppend(currentChar, 2121 replacement->length() - (currentChar - replacementBegin)); 2122 } 2123 2124 /* 2125 * This function finds the list of named captures of the form 2126 * "$<name>" in a replacement string and converts them into jsids, for 2127 * use in InitNamedReplacements. 2128 */ 2129 template <typename CharT> 2130 static bool CollectNames(JSContext* cx, Handle<JSLinearString*> replacement, 2131 size_t firstDollarIndex, 2132 MutableHandle<GCVector<jsid>> names) { 2133 JS::AutoCheckCannotGC nogc; 2134 MOZ_ASSERT(firstDollarIndex < replacement->length()); 2135 2136 const CharT* replacementBegin = replacement->chars<CharT>(nogc); 2137 const CharT* currentDollar = replacementBegin + firstDollarIndex; 2138 const CharT* replacementEnd = replacementBegin + replacement->length(); 2139 2140 // https://tc39.es/ecma262/#table-45, "$<" section 2141 while (currentDollar && currentDollar + 1 < replacementEnd) { 2142 if (currentDollar[1] == '<') { 2143 // Step 2.b 2144 const CharT* nameStart = currentDollar + 2; 2145 const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd); 2146 2147 // Step 2.c 2148 if (!nameEnd) { 2149 return true; 2150 } 2151 2152 // Step 2.d.i 2153 size_t nameLength = nameEnd - nameStart; 2154 JSAtom* atom = AtomizeChars(cx, nameStart, nameLength); 2155 if (!atom || !names.append(AtomToId(atom))) { 2156 return false; 2157 } 2158 currentDollar = nameEnd + 1; 2159 } else { 2160 currentDollar += 2; 2161 } 2162 currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd); 2163 } 2164 return true; 2165 } 2166 2167 /* 2168 * When replacing named captures, the spec requires us to perform 2169 * `Get(match.groups, name)` for each "$<name>". These `Get`s can be 2170 * script-visible; for example, RegExp can be extended with an `exec` 2171 * method that wraps `groups` in a proxy. To make sure that we do the 2172 * right thing, if a regexp has named captures, we find the named 2173 * capture replacements before beginning the actual replacement. 2174 * This guarantees that we will call GetProperty once and only once for 2175 * each "$<name>" in the replacement string, in the correct order. 2176 * 2177 * This function precomputes the results of step 2 of the '$<' case 2178 * here: https://tc39.es/proposal-regexp-named-groups/#table-45, so 2179 * that when we need to access the nth named capture in InterpretDollar, 2180 * we can just use the nth value stored in namedCaptures. 2181 */ 2182 static bool InitNamedCaptures(JSContext* cx, 2183 Handle<JSLinearString*> replacement, 2184 HandleObject groups, size_t firstDollarIndex, 2185 MutableHandle<CapturesVector> namedCaptures) { 2186 Rooted<GCVector<jsid>> names(cx, cx); 2187 if (replacement->hasLatin1Chars()) { 2188 if (!CollectNames<Latin1Char>(cx, replacement, firstDollarIndex, &names)) { 2189 return false; 2190 } 2191 } else { 2192 if (!CollectNames<char16_t>(cx, replacement, firstDollarIndex, &names)) { 2193 return false; 2194 } 2195 } 2196 2197 // https://tc39.es/ecma262/#table-45, "$<" section 2198 RootedId id(cx); 2199 RootedValue capture(cx); 2200 for (uint32_t i = 0; i < names.length(); i++) { 2201 // Step 2.d.i 2202 id = names[i]; 2203 2204 // Step 2.d.ii 2205 if (!GetProperty(cx, groups, groups, id, &capture)) { 2206 return false; 2207 } 2208 2209 // Step 2.d.iii 2210 if (capture.isUndefined()) { 2211 if (!namedCaptures.append(capture)) { 2212 return false; 2213 } 2214 } else { 2215 // Step 2.d.iv 2216 JSString* str = ToString<CanGC>(cx, capture); 2217 if (!str) { 2218 return false; 2219 } 2220 JSLinearString* linear = str->ensureLinear(cx); 2221 if (!linear) { 2222 return false; 2223 } 2224 if (!namedCaptures.append(StringValue(linear))) { 2225 return false; 2226 } 2227 } 2228 } 2229 2230 return true; 2231 } 2232 2233 static bool NeedTwoBytes(Handle<JSLinearString*> string, 2234 Handle<JSLinearString*> replacement, 2235 Handle<JSLinearString*> matched, 2236 Handle<CapturesVector> captures, 2237 Handle<CapturesVector> namedCaptures) { 2238 if (string->hasTwoByteChars()) { 2239 return true; 2240 } 2241 if (replacement->hasTwoByteChars()) { 2242 return true; 2243 } 2244 if (matched->hasTwoByteChars()) { 2245 return true; 2246 } 2247 2248 for (const Value& capture : captures) { 2249 if (capture.isUndefined()) { 2250 continue; 2251 } 2252 if (capture.toString()->hasTwoByteChars()) { 2253 return true; 2254 } 2255 } 2256 2257 for (const Value& capture : namedCaptures) { 2258 if (capture.isUndefined()) { 2259 continue; 2260 } 2261 if (capture.toString()->hasTwoByteChars()) { 2262 return true; 2263 } 2264 } 2265 2266 return false; 2267 } 2268 2269 // ES2024 draft rev d4927f9bc3706484c75dfef4bbcf5ba826d2632e 2270 // 2271 // 22.2.7.2 RegExpBuiltinExec ( R, S ) 2272 // https://tc39.es/ecma262/#sec-regexpbuiltinexec 2273 // 2274 // If `forTest` is true, this is called from `RegExp.prototype.test` and we can 2275 // avoid allocating a result object. 2276 bool js::RegExpBuiltinExec(JSContext* cx, Handle<RegExpObject*> regexp, 2277 Handle<JSString*> string, bool forTest, 2278 MutableHandle<Value> rval) { 2279 // Step 2. 2280 uint64_t lastIndex; 2281 if (MOZ_LIKELY(regexp->getLastIndex().isInt32())) { 2282 lastIndex = std::max(regexp->getLastIndex().toInt32(), 0); 2283 } else { 2284 Rooted<Value> lastIndexVal(cx, regexp->getLastIndex()); 2285 if (!ToLength(cx, lastIndexVal, &lastIndex)) { 2286 return false; 2287 } 2288 } 2289 2290 // Steps 3-5. 2291 bool globalOrSticky = regexp->isGlobalOrSticky(); 2292 2293 // Step 7. 2294 if (!globalOrSticky) { 2295 lastIndex = 0; 2296 } else { 2297 // Steps 1, 13.a. 2298 if (lastIndex > string->length()) { 2299 if (!SetLastIndex(cx, regexp, 0)) { 2300 return false; 2301 } 2302 rval.set(forTest ? BooleanValue(false) : NullValue()); 2303 return true; 2304 } 2305 } 2306 2307 MOZ_ASSERT(lastIndex <= string->length()); 2308 static_assert(JSString::MAX_LENGTH <= INT32_MAX, "lastIndex fits in int32_t"); 2309 2310 // Steps 6, 8-35. 2311 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); 2312 if (!res) { 2313 return false; 2314 } 2315 2316 if (forTest) { 2317 bool result; 2318 if (!RegExpBuiltinExecTestRaw<false>(cx, regexp, string, int32_t(lastIndex), 2319 &result)) { 2320 return false; 2321 } 2322 2323 rval.setBoolean(result); 2324 return true; 2325 } 2326 2327 return RegExpBuiltinExecMatchRaw<false>(cx, regexp, string, 2328 int32_t(lastIndex), nullptr, rval); 2329 } 2330 2331 bool js::IsOptimizableRegExpObject(JSObject* obj, JSContext* cx) { 2332 // Check the shape to ensure this is a plain RegExpObject with this realm's 2333 // RegExp.prototype as prototype and without any extra own properties. 2334 // The fuse check ensures RegExp.prototype is optimizable. 2335 bool optimizable = 2336 obj->shape() == cx->global()->maybeRegExpShapeWithDefaultProto() && 2337 cx->realm()->realmFuses.optimizeRegExpPrototypeFuse.intact(); 2338 MOZ_ASSERT_IF(optimizable, 2339 obj->is<RegExpObject>() && 2340 obj->as<RegExpObject>().realm() == cx->realm()); 2341 return optimizable; 2342 } 2343 2344 // ES2024 draft rev d4927f9bc3706484c75dfef4bbcf5ba826d2632e 2345 // 2346 // 22.2.7.1 RegExpExec ( R, S ) 2347 // https://tc39.es/ecma262/#sec-regexpexec 2348 // 2349 // If `forTest` is true, this is called from `RegExp.prototype.test` and we can 2350 // avoid allocating a result object. 2351 bool js::RegExpExec(JSContext* cx, Handle<JSObject*> regexp, 2352 Handle<JSString*> string, bool forTest, 2353 MutableHandle<Value> rval) { 2354 // Fast path for the case where `regexp` is a regular expression object with 2355 // the builtin `RegExp.prototype.exec` function. 2356 if (MOZ_LIKELY(IsOptimizableRegExpObject(regexp, cx))) { 2357 return RegExpBuiltinExec(cx, regexp.as<RegExpObject>(), string, forTest, 2358 rval); 2359 } 2360 2361 // Step 1. 2362 Rooted<Value> exec(cx); 2363 Rooted<PropertyKey> execKey(cx, NameToId(cx->names().exec)); 2364 if (!GetProperty(cx, regexp, regexp, execKey, &exec)) { 2365 return false; 2366 } 2367 2368 // Step 2. 2369 // If exec is the original RegExp.prototype.exec, use the same, faster, 2370 // path as for the case where exec isn't callable. 2371 PropertyName* execName = cx->names().RegExp_prototype_Exec; 2372 if (IsSelfHostedFunctionWithName(exec, execName) || !IsCallable(exec)) { 2373 // Steps 3-4. 2374 if (MOZ_LIKELY(regexp->is<RegExpObject>())) { 2375 return RegExpBuiltinExec(cx, regexp.as<RegExpObject>(), string, forTest, 2376 rval); 2377 } 2378 2379 // Throw an exception if it's not a wrapped RegExpObject that we can safely 2380 // unwrap. 2381 if (!regexp->canUnwrapAs<RegExpObject>()) { 2382 Rooted<Value> thisv(cx, ObjectValue(*regexp)); 2383 return ReportIncompatibleSelfHostedMethod( 2384 cx, thisv, IncompatibleContext::RegExpExec); 2385 } 2386 2387 // Call RegExpBuiltinExec in the regular expression's realm. 2388 Rooted<RegExpObject*> unwrapped(cx, ®exp->unwrapAs<RegExpObject>()); 2389 { 2390 AutoRealm ar(cx, unwrapped); 2391 Rooted<JSString*> wrappedString(cx, string); 2392 if (!cx->compartment()->wrap(cx, &wrappedString)) { 2393 return false; 2394 } 2395 if (!RegExpBuiltinExec(cx, unwrapped, wrappedString, forTest, rval)) { 2396 return false; 2397 } 2398 } 2399 return cx->compartment()->wrap(cx, rval); 2400 } 2401 2402 // Step 2.a. 2403 Rooted<Value> thisv(cx, ObjectValue(*regexp)); 2404 FixedInvokeArgs<1> args(cx); 2405 args[0].setString(string); 2406 if (!js::Call(cx, exec, thisv, args, rval, CallReason::CallContent)) { 2407 return false; 2408 } 2409 2410 // Step 2.b. 2411 if (!rval.isObjectOrNull()) { 2412 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 2413 JSMSG_EXEC_NOT_OBJORNULL); 2414 return false; 2415 } 2416 2417 // Step 2.c. 2418 if (forTest) { 2419 rval.setBoolean(rval.isObject()); 2420 } 2421 return true; 2422 } 2423 2424 bool js::RegExpHasCaptureGroups(JSContext* cx, Handle<RegExpObject*> obj, 2425 Handle<JSString*> input, bool* result) { 2426 // pairCount is only available for compiled regular expressions. 2427 if (!obj->hasShared() || 2428 obj->getShared()->kind() == RegExpShared::Kind::Unparsed) { 2429 Rooted<RegExpShared*> shared(cx, RegExpObject::getShared(cx, obj)); 2430 if (!shared) { 2431 return false; 2432 } 2433 Rooted<JSLinearString*> inputLinear(cx, input->ensureLinear(cx)); 2434 if (!inputLinear) { 2435 return false; 2436 } 2437 if (!RegExpShared::compileIfNecessary(cx, &shared, inputLinear, 2438 RegExpShared::CodeKind::Any)) { 2439 return false; 2440 } 2441 } 2442 2443 MOZ_ASSERT(obj->getShared()->pairCount() >= 1); 2444 2445 *result = obj->getShared()->pairCount() > 1; 2446 return true; 2447 } 2448 2449 /* ES 2021 21.1.3.17.1 */ 2450 // https://tc39.es/ecma262/#sec-getsubstitution 2451 bool js::RegExpGetSubstitution(JSContext* cx, Handle<ArrayObject*> matchResult, 2452 Handle<JSLinearString*> string, size_t position, 2453 Handle<JSLinearString*> replacement, 2454 size_t firstDollarIndex, HandleValue groups, 2455 MutableHandleValue rval) { 2456 MOZ_ASSERT(firstDollarIndex < replacement->length()); 2457 2458 // Step 1 (skipped). 2459 2460 // Step 10 (reordered). 2461 uint32_t matchResultLength = matchResult->length(); 2462 MOZ_ASSERT(matchResultLength > 0); 2463 MOZ_ASSERT(matchResultLength == matchResult->getDenseInitializedLength()); 2464 2465 const Value& matchedValue = matchResult->getDenseElement(0); 2466 Rooted<JSLinearString*> matched(cx, 2467 matchedValue.toString()->ensureLinear(cx)); 2468 if (!matched) { 2469 return false; 2470 } 2471 2472 // Step 2. 2473 size_t matchLength = matched->length(); 2474 2475 // Steps 3-5 (skipped). 2476 2477 // Step 6. 2478 MOZ_ASSERT(position <= string->length()); 2479 2480 uint32_t nCaptures = matchResultLength - 1; 2481 Rooted<CapturesVector> captures(cx, CapturesVector(cx)); 2482 if (!captures.reserve(nCaptures)) { 2483 return false; 2484 } 2485 2486 // Step 7. 2487 for (uint32_t i = 1; i <= nCaptures; i++) { 2488 const Value& capture = matchResult->getDenseElement(i); 2489 2490 if (capture.isUndefined()) { 2491 captures.infallibleAppend(capture); 2492 continue; 2493 } 2494 2495 JSLinearString* captureLinear = capture.toString()->ensureLinear(cx); 2496 if (!captureLinear) { 2497 return false; 2498 } 2499 captures.infallibleAppend(StringValue(captureLinear)); 2500 } 2501 2502 Rooted<CapturesVector> namedCaptures(cx, cx); 2503 if (groups.isObject()) { 2504 RootedObject groupsObj(cx, &groups.toObject()); 2505 if (!InitNamedCaptures(cx, replacement, groupsObj, firstDollarIndex, 2506 &namedCaptures)) { 2507 return false; 2508 } 2509 } else { 2510 MOZ_ASSERT(groups.isUndefined()); 2511 } 2512 2513 // Step 8 (skipped). 2514 2515 // Step 9. 2516 CheckedInt<uint32_t> checkedTailPos(0); 2517 checkedTailPos += position; 2518 checkedTailPos += matchLength; 2519 if (!checkedTailPos.isValid()) { 2520 ReportAllocationOverflow(cx); 2521 return false; 2522 } 2523 uint32_t tailPos = checkedTailPos.value(); 2524 2525 // Step 11. 2526 size_t reserveLength; 2527 if (!FindReplaceLength(cx, matched, string, position, tailPos, captures, 2528 namedCaptures, replacement, firstDollarIndex, 2529 &reserveLength)) { 2530 return false; 2531 } 2532 2533 JSStringBuilder result(cx); 2534 if (NeedTwoBytes(string, replacement, matched, captures, namedCaptures)) { 2535 if (!result.ensureTwoByteChars()) { 2536 return false; 2537 } 2538 } 2539 2540 if (!result.reserve(reserveLength)) { 2541 return false; 2542 } 2543 2544 if (replacement->hasLatin1Chars()) { 2545 DoReplace<Latin1Char>(matched, string, position, tailPos, captures, 2546 namedCaptures, replacement, firstDollarIndex, result); 2547 } else { 2548 DoReplace<char16_t>(matched, string, position, tailPos, captures, 2549 namedCaptures, replacement, firstDollarIndex, result); 2550 } 2551 2552 // Step 12. 2553 JSString* resultString = result.finishString(); 2554 if (!resultString) { 2555 return false; 2556 } 2557 2558 rval.setString(resultString); 2559 return true; 2560 } 2561 2562 bool js::GetFirstDollarIndex(JSContext* cx, unsigned argc, Value* vp) { 2563 CallArgs args = CallArgsFromVp(argc, vp); 2564 MOZ_ASSERT(args.length() == 1); 2565 JSString* str = args[0].toString(); 2566 2567 // Should be handled in different path. 2568 MOZ_ASSERT(str->length() != 0); 2569 2570 int32_t index = -1; 2571 if (!GetFirstDollarIndexRaw(cx, str, &index)) { 2572 return false; 2573 } 2574 2575 args.rval().setInt32(index); 2576 return true; 2577 } 2578 2579 template <typename TextChar> 2580 static MOZ_ALWAYS_INLINE int GetFirstDollarIndexImpl(const TextChar* text, 2581 uint32_t textLen) { 2582 const TextChar* end = text + textLen; 2583 for (const TextChar* c = text; c != end; ++c) { 2584 if (*c == '$') { 2585 return c - text; 2586 } 2587 } 2588 return -1; 2589 } 2590 2591 template <typename StringT> 2592 int32_t js::GetFirstDollarIndexRawFlat(const StringT* text) { 2593 uint32_t len = text->length(); 2594 2595 JS::AutoCheckCannotGC nogc; 2596 if (text->hasLatin1Chars()) { 2597 return GetFirstDollarIndexImpl(text->latin1Chars(nogc), len); 2598 } 2599 2600 return GetFirstDollarIndexImpl(text->twoByteChars(nogc), len); 2601 } 2602 2603 template int32_t js::GetFirstDollarIndexRawFlat<JSLinearString>( 2604 const JSLinearString* text); 2605 template int32_t js::GetFirstDollarIndexRawFlat<JSOffThreadAtom>( 2606 const JSOffThreadAtom* text); 2607 2608 bool js::GetFirstDollarIndexRaw(JSContext* cx, JSString* str, int32_t* index) { 2609 JSLinearString* text = str->ensureLinear(cx); 2610 if (!text) { 2611 return false; 2612 } 2613 2614 *index = GetFirstDollarIndexRawFlat(text); 2615 return true; 2616 } 2617 2618 bool js::IsRegExpPrototypeOptimizable(JSContext* cx, unsigned argc, Value* vp) { 2619 // This can only be called from self-hosted code. 2620 CallArgs args = CallArgsFromVp(argc, vp); 2621 MOZ_ASSERT(args.length() == 0); 2622 2623 bool optimizable = 2624 cx->realm()->realmFuses.optimizeRegExpPrototypeFuse.intact(); 2625 args.rval().setBoolean(optimizable); 2626 return true; 2627 } 2628 2629 bool js::IsOptimizableRegExpObject(JSContext* cx, unsigned argc, Value* vp) { 2630 // This can only be called from self-hosted code. 2631 CallArgs args = CallArgsFromVp(argc, vp); 2632 MOZ_ASSERT(args.length() == 1); 2633 MOZ_ASSERT(args[0].isObject()); 2634 2635 JSObject* obj = &args[0].toObject(); 2636 2637 bool optimizable = IsOptimizableRegExpObject(obj, cx); 2638 args.rval().setBoolean(optimizable); 2639 return true; 2640 } 2641 2642 /* 2643 * Pattern match the script to check if it is is indexing into a particular 2644 * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in 2645 * such cases, which are used by javascript packers (particularly the popular 2646 * Dean Edwards packer) to efficiently encode large scripts. We only handle the 2647 * code patterns generated by such packers here. 2648 */ 2649 bool js::intrinsic_GetElemBaseForLambda(JSContext* cx, unsigned argc, 2650 Value* vp) { 2651 // This can only be called from self-hosted code. 2652 CallArgs args = CallArgsFromVp(argc, vp); 2653 MOZ_ASSERT(args.length() == 1); 2654 2655 JSObject& lambda = args[0].toObject(); 2656 args.rval().setUndefined(); 2657 2658 if (!lambda.is<JSFunction>()) { 2659 return true; 2660 } 2661 2662 RootedFunction fun(cx, &lambda.as<JSFunction>()); 2663 if (!fun->isInterpreted() || fun->isClassConstructor()) { 2664 return true; 2665 } 2666 2667 JSScript* script = JSFunction::getOrCreateScript(cx, fun); 2668 if (!script) { 2669 return false; 2670 } 2671 2672 jsbytecode* pc = script->code(); 2673 2674 /* 2675 * JSOp::GetAliasedVar tells us exactly where to find the base object 'b'. 2676 * Rule out the (unlikely) possibility of a function with environment 2677 * objects since it would make our environment walk off. 2678 */ 2679 if (JSOp(*pc) != JSOp::GetAliasedVar || fun->needsSomeEnvironmentObject()) { 2680 return true; 2681 } 2682 EnvironmentCoordinate ec(pc); 2683 EnvironmentObject* env = &fun->environment()->as<EnvironmentObject>(); 2684 for (unsigned i = 0; i < ec.hops(); ++i) { 2685 env = &env->enclosingEnvironment().as<EnvironmentObject>(); 2686 } 2687 Value b = env->aliasedBinding(ec); 2688 pc += JSOpLength_GetAliasedVar; 2689 2690 /* Look for 'a' to be the lambda's first argument. */ 2691 if (JSOp(*pc) != JSOp::GetArg || GET_ARGNO(pc) != 0) { 2692 return true; 2693 } 2694 pc += JSOpLength_GetArg; 2695 2696 /* 'b[a]' */ 2697 if (JSOp(*pc) != JSOp::GetElem) { 2698 return true; 2699 } 2700 pc += JSOpLength_GetElem; 2701 2702 /* 'return b[a]' */ 2703 if (JSOp(*pc) != JSOp::Return) { 2704 return true; 2705 } 2706 2707 /* 'b' must behave like a normal object. */ 2708 if (!b.isObject()) { 2709 return true; 2710 } 2711 2712 JSObject& bobj = b.toObject(); 2713 const JSClass* clasp = bobj.getClass(); 2714 if (!clasp->isNativeObject() || clasp->getOpsLookupProperty() || 2715 clasp->getOpsGetProperty()) { 2716 return true; 2717 } 2718 2719 args.rval().setObject(bobj); 2720 return true; 2721 } 2722 2723 /* 2724 * Emulates `b[a]` property access, that is detected in GetElemBaseForLambda. 2725 * It returns the property value only if the property is data property and the 2726 * property value is a string. Otherwise it returns undefined. 2727 */ 2728 bool js::intrinsic_GetStringDataProperty(JSContext* cx, unsigned argc, 2729 Value* vp) { 2730 CallArgs args = CallArgsFromVp(argc, vp); 2731 MOZ_ASSERT(args.length() == 2); 2732 2733 JSObject* obj = &args[0].toObject(); 2734 if (!obj->is<NativeObject>()) { 2735 // The object is already checked to be native in GetElemBaseForLambda, 2736 // but it can be swapped to another class that is non-native. 2737 // Return undefined to mark failure to get the property. 2738 args.rval().setUndefined(); 2739 return true; 2740 } 2741 2742 // No need to root |obj| because |AtomizeString| can't GC. 2743 JS::AutoCheckCannotGC nogc; 2744 2745 JSAtom* atom = AtomizeString(cx, args[1].toString()); 2746 if (!atom) { 2747 return false; 2748 } 2749 2750 Value v; 2751 if (GetPropertyPure(cx, obj, AtomToId(atom), &v) && v.isString()) { 2752 args.rval().set(v); 2753 } else { 2754 args.rval().setUndefined(); 2755 } 2756 2757 return true; 2758 }