tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

RegExp.cpp (88572B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "builtin/RegExp.h"
      8 
      9 #include "mozilla/Casting.h"
     10 #include "mozilla/CheckedInt.h"
     11 #include "mozilla/TextUtils.h"
     12 
     13 #include "jsapi.h"
     14 
     15 #include "frontend/FrontendContext.h"  // AutoReportFrontendContext
     16 #include "frontend/TokenStream.h"
     17 #include "irregexp/RegExpAPI.h"
     18 #include "jit/InlinableNatives.h"
     19 #include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_NEWREGEXP_FLAGGED
     20 #include "js/PropertySpec.h"
     21 #include "js/RegExpFlags.h"  // JS::RegExpFlag, JS::RegExpFlags
     22 #include "util/StringBuilder.h"
     23 #include "vm/EqualityOperations.h"
     24 #include "vm/Interpreter.h"
     25 #include "vm/JSContext.h"
     26 #include "vm/RegExpObject.h"
     27 #include "vm/RegExpStatics.h"
     28 #include "vm/SelfHosting.h"
     29 
     30 #include "vm/EnvironmentObject-inl.h"
     31 #include "vm/GeckoProfiler-inl.h"
     32 #include "vm/JSObject-inl.h"
     33 #include "vm/ObjectOperations-inl.h"
     34 #include "vm/PlainObject-inl.h"
     35 
     36 using namespace js;
     37 
     38 using mozilla::AssertedCast;
     39 using mozilla::CheckedInt;
     40 using mozilla::IsAsciiDigit;
     41 
     42 using JS::CompileOptions;
     43 using JS::RegExpFlag;
     44 using JS::RegExpFlags;
     45 
     46 // Allocate an object for the |.groups| or |.indices.groups| property
     47 // of a regexp match result.
     48 static PlainObject* CreateGroupsObject(JSContext* cx,
     49                                       Handle<PlainObject*> groupsTemplate) {
     50  if (groupsTemplate->inDictionaryMode()) {
     51    return NewPlainObjectWithProto(cx, nullptr);
     52  }
     53 
     54  // The groups template object is stored in RegExpShared, which is shared
     55  // across compartments and realms. So watch out for the case when the template
     56  // object's realm is different from the current realm.
     57  if (cx->realm() != groupsTemplate->realm()) {
     58    return PlainObject::createWithTemplateFromDifferentRealm(cx,
     59                                                             groupsTemplate);
     60  }
     61 
     62  return PlainObject::createWithTemplate(cx, groupsTemplate);
     63 }
     64 
     65 static inline void getValueAndIndex(HandleRegExpShared re, uint32_t i,
     66                                    Handle<ArrayObject*> arr,
     67                                    MutableHandleValue val,
     68                                    uint32_t& valueIndex) {
     69  if (re->numNamedCaptures() == re->numDistinctNamedCaptures()) {
     70    valueIndex = re->getNamedCaptureIndex(i);
     71    val.set(arr->getDenseElement(valueIndex));
     72  } else {
     73    mozilla::Span<uint32_t> indicesSlice = re->getNamedCaptureIndices(i);
     74    MOZ_ASSERT(!indicesSlice.IsEmpty());
     75    valueIndex = indicesSlice[0];
     76    for (uint32_t index : indicesSlice) {
     77      val.set(arr->getDenseElement(index));
     78      if (!val.isUndefined()) {
     79        valueIndex = index;
     80        break;
     81      }
     82    }
     83  }
     84 }
     85 
     86 /*
     87 * Implements RegExpBuiltinExec: Steps 18-35
     88 * https://tc39.es/ecma262/#sec-regexpbuiltinexec
     89 */
     90 bool js::CreateRegExpMatchResult(JSContext* cx, HandleRegExpShared re,
     91                                 HandleString input, const MatchPairs& matches,
     92                                 MutableHandleValue rval) {
     93  MOZ_ASSERT(re);
     94  MOZ_ASSERT(input);
     95 
     96  /*
     97   * Create the (slow) result array for a match.
     98   *
     99   * Array contents:
    100   *  0:              matched string
    101   *  1..pairCount-1: paren matches
    102   *  input:          input string
    103   *  index:          start index for the match
    104   *  groups:         named capture groups for the match
    105   *  indices:        capture indices for the match, if required
    106   */
    107 
    108  bool hasIndices = re->hasIndices();
    109 
    110  // Get the shape for the output object.
    111  RegExpRealm::ResultShapeKind kind =
    112      hasIndices ? RegExpRealm::ResultShapeKind::WithIndices
    113                 : RegExpRealm::ResultShapeKind::Normal;
    114  Rooted<SharedShape*> shape(
    115      cx, cx->global()->regExpRealm().getOrCreateMatchResultShape(cx, kind));
    116  if (!shape) {
    117    return false;
    118  }
    119 
    120  // Steps 18-19
    121  size_t numPairs = matches.length();
    122  MOZ_ASSERT(numPairs > 0);
    123 
    124  // Steps 20-21: Allocate the match result object.
    125  Rooted<ArrayObject*> arr(
    126      cx, NewDenseFullyAllocatedArrayWithShape(cx, numPairs, shape));
    127  if (!arr) {
    128    return false;
    129  }
    130 
    131  // Steps 28-29 and 33 a-d: Initialize the elements of the match result.
    132  // Store a Value for each match pair.
    133  for (size_t i = 0; i < numPairs; i++) {
    134    const MatchPair& pair = matches[i];
    135 
    136    if (pair.isUndefined()) {
    137      MOZ_ASSERT(i != 0);  // Since we had a match, first pair must be present.
    138      arr->setDenseInitializedLength(i + 1);
    139      arr->initDenseElement(i, UndefinedValue());
    140    } else {
    141      JSLinearString* str =
    142          NewDependentString(cx, input, pair.start, pair.length());
    143      if (!str) {
    144        return false;
    145      }
    146      arr->setDenseInitializedLength(i + 1);
    147      arr->initDenseElement(i, StringValue(str));
    148    }
    149  }
    150 
    151  // Step 34a (reordered): Allocate and initialize the indices object if needed.
    152  // This is an inlined implementation of MakeIndicesArray:
    153  // https://tc39.es/ecma262/#sec-makeindicesarray
    154  Rooted<ArrayObject*> indices(cx);
    155  Rooted<PlainObject*> indicesGroups(cx);
    156  if (hasIndices) {
    157    // MakeIndicesArray: step 8
    158    Rooted<SharedShape*> indicesShape(
    159        cx, cx->global()->regExpRealm().getOrCreateMatchResultShape(
    160                cx, RegExpRealm::ResultShapeKind::Indices));
    161    if (!indicesShape) {
    162      return false;
    163    }
    164    indices = NewDenseFullyAllocatedArrayWithShape(cx, numPairs, indicesShape);
    165    if (!indices) {
    166      return false;
    167    }
    168 
    169    // MakeIndicesArray: steps 10-12
    170    if (re->numNamedCaptures() > 0) {
    171      Rooted<PlainObject*> groupsTemplate(cx, re->getGroupsTemplate());
    172      indicesGroups = CreateGroupsObject(cx, groupsTemplate);
    173      if (!indicesGroups) {
    174        return false;
    175      }
    176      indices->initSlot(RegExpRealm::IndicesGroupsSlot,
    177                        ObjectValue(*indicesGroups));
    178    }
    179 
    180    // MakeIndicesArray: step 13 a-d. (Step 13.e is implemented below.)
    181    for (size_t i = 0; i < numPairs; i++) {
    182      const MatchPair& pair = matches[i];
    183 
    184      if (pair.isUndefined()) {
    185        // Since we had a match, first pair must be present.
    186        MOZ_ASSERT(i != 0);
    187        indices->setDenseInitializedLength(i + 1);
    188        indices->initDenseElement(i, UndefinedValue());
    189      } else {
    190        ArrayObject* indexPair = NewDenseFullyAllocatedArray(cx, 2);
    191        if (!indexPair) {
    192          return false;
    193        }
    194        indexPair->setDenseInitializedLength(2);
    195        indexPair->initDenseElement(0, Int32Value(pair.start));
    196        indexPair->initDenseElement(1, Int32Value(pair.limit));
    197 
    198        indices->setDenseInitializedLength(i + 1);
    199        indices->initDenseElement(i, ObjectValue(*indexPair));
    200      }
    201    }
    202  }
    203 
    204  // Steps 30-31 (reordered): Allocate the groups object (if needed).
    205  Rooted<PlainObject*> groups(cx);
    206  bool groupsInDictionaryMode = false;
    207  if (re->numNamedCaptures() > 0) {
    208    Rooted<PlainObject*> groupsTemplate(cx, re->getGroupsTemplate());
    209    groupsInDictionaryMode = groupsTemplate->inDictionaryMode();
    210    groups = CreateGroupsObject(cx, groupsTemplate);
    211    if (!groups) {
    212      return false;
    213    }
    214  }
    215 
    216  // Step 33 e-f: Initialize the properties of |groups| and |indices.groups|.
    217  // The groups template object stores the names of the named captures
    218  // in the the order in which they are defined. The named capture
    219  // indices vector stores the corresponding capture indices. In
    220  // dictionary mode, we have to define the properties explicitly. If
    221  // we are not in dictionary mode, we simply fill in the slots with
    222  // the correct values.
    223  if (groupsInDictionaryMode) {
    224    RootedIdVector keys(cx);
    225    Rooted<PlainObject*> groupsTemplate(cx, re->getGroupsTemplate());
    226    if (!GetPropertyKeys(cx, groupsTemplate, 0, &keys)) {
    227      return false;
    228    }
    229    MOZ_ASSERT(keys.length() == re->numDistinctNamedCaptures());
    230    RootedId key(cx);
    231    RootedValue val(cx);
    232    uint32_t valueIndex;
    233    for (uint32_t i = 0; i < keys.length(); i++) {
    234      key = keys[i];
    235      getValueAndIndex(re, i, arr, &val, valueIndex);
    236      if (!NativeDefineDataProperty(cx, groups, key, val, JSPROP_ENUMERATE)) {
    237        return false;
    238      }
    239 
    240      // MakeIndicesArray: Step 13.e (reordered)
    241      if (hasIndices) {
    242        val = indices->getDenseElement(valueIndex);
    243        if (!NativeDefineDataProperty(cx, indicesGroups, key, val,
    244                                      JSPROP_ENUMERATE)) {
    245          return false;
    246        }
    247      }
    248    }
    249  } else {
    250    RootedValue val(cx);
    251    uint32_t valueIndex;
    252 
    253    for (uint32_t i = 0; i < re->numDistinctNamedCaptures(); i++) {
    254      getValueAndIndex(re, i, arr, &val, valueIndex);
    255      groups->initSlot(i, val);
    256 
    257      // MakeIndicesArray: Step 13.e (reordered)
    258      if (hasIndices) {
    259        indicesGroups->initSlot(i, indices->getDenseElement(valueIndex));
    260      }
    261    }
    262  }
    263 
    264  // Step 22 (reordered).
    265  // Set the |index| property.
    266  arr->initSlot(RegExpRealm::MatchResultObjectIndexSlot,
    267                Int32Value(matches[0].start));
    268 
    269  // Step 23 (reordered).
    270  // Set the |input| property.
    271  arr->initSlot(RegExpRealm::MatchResultObjectInputSlot, StringValue(input));
    272 
    273  // Step 32 (reordered)
    274  // Set the |groups| property.
    275  if (groups) {
    276    arr->initSlot(RegExpRealm::MatchResultObjectGroupsSlot,
    277                  ObjectValue(*groups));
    278  }
    279 
    280  // Step 34b
    281  // Set the |indices| property.
    282  if (re->hasIndices()) {
    283    arr->initSlot(RegExpRealm::MatchResultObjectIndicesSlot,
    284                  ObjectValue(*indices));
    285  }
    286 
    287 #ifdef DEBUG
    288  RootedValue test(cx);
    289  RootedId id(cx, NameToId(cx->names().index));
    290  if (!NativeGetProperty(cx, arr, id, &test)) {
    291    return false;
    292  }
    293  MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectIndexSlot));
    294  id = NameToId(cx->names().input);
    295  if (!NativeGetProperty(cx, arr, id, &test)) {
    296    return false;
    297  }
    298  MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectInputSlot));
    299 #endif
    300 
    301  // Step 35.
    302  rval.setObject(*arr);
    303  return true;
    304 }
    305 
    306 static int32_t CreateRegExpSearchResult(JSContext* cx,
    307                                        const MatchPairs& matches) {
    308  MOZ_ASSERT(matches[0].start >= 0);
    309  MOZ_ASSERT(matches[0].limit >= 0);
    310 
    311  MOZ_ASSERT(cx->regExpSearcherLastLimit == RegExpSearcherLastLimitSentinel);
    312 
    313 #ifdef DEBUG
    314  static_assert(JSString::MAX_LENGTH < RegExpSearcherLastLimitSentinel);
    315  MOZ_ASSERT(uint32_t(matches[0].limit) < RegExpSearcherLastLimitSentinel);
    316 #endif
    317 
    318  cx->regExpSearcherLastLimit = matches[0].limit;
    319  return matches[0].start;
    320 }
    321 /*
    322 * https://github.com/tc39/proposal-regexp-legacy-features/blob/master/README.md#regexpbuiltinexec--r-s-
    323 *
    324 */
    325 
    326 static bool ShouldUpdateRegExpStatics(JSContext* cx,
    327                                      Handle<RegExpObject*> regexp) {
    328  if (!JS::Prefs::experimental_legacy_regexp()) {
    329    return true;
    330  }
    331  // Step 5. Let thisRealm be the current Realm Record.
    332  JS::Realm* thisRealm = cx->realm();
    333  // Step 6. Let rRealm be the value of R's [[Realm]] internal slot.
    334  JS::Realm* rRealm = regexp->realm();
    335 
    336  // Step 7. If SameValue(thisRealm, rRealm) is true, then
    337  if (thisRealm == rRealm) {
    338    return regexp->legacyFeaturesEnabled();
    339  }
    340  return false;
    341 }
    342 
    343 /*
    344 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
    345 * steps 3, 9-14, except 12.a.i, 12.c.i.1.
    346 */
    347 static RegExpRunStatus ExecuteRegExpImpl(JSContext* cx, RegExpStatics* res,
    348                                         MutableHandleRegExpShared re,
    349                                         Handle<JSLinearString*> input,
    350                                         size_t searchIndex,
    351                                         VectorMatchPairs* matches,
    352                                         Handle<RegExpObject*> regexp) {
    353  RegExpRunStatus status =
    354      RegExpShared::execute(cx, re, input, searchIndex, matches);
    355 
    356  /* Out of spec: Update RegExpStatics. */
    357  if (status == RegExpRunStatus::Success && res) {
    358    if (ShouldUpdateRegExpStatics(cx, regexp)) {
    359      if (!res->updateFromMatchPairs(cx, input, *matches)) {
    360        return RegExpRunStatus::Error;
    361      }
    362    } else {
    363      res->invalidate();
    364    }
    365  }
    366  return status;
    367 }
    368 
    369 /* Legacy ExecuteRegExp behavior is baked into the JSAPI. */
    370 bool js::ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res,
    371                             Handle<RegExpObject*> reobj,
    372                             Handle<JSLinearString*> input, size_t* lastIndex,
    373                             bool test, MutableHandleValue rval) {
    374  cx->check(reobj, input);
    375 
    376  RootedRegExpShared shared(cx, RegExpObject::getShared(cx, reobj));
    377  if (!shared) {
    378    return false;
    379  }
    380 
    381  VectorMatchPairs matches;
    382 
    383  RegExpRunStatus status =
    384      ExecuteRegExpImpl(cx, res, &shared, input, *lastIndex, &matches, reobj);
    385  if (status == RegExpRunStatus::Error) {
    386    return false;
    387  }
    388 
    389  if (status == RegExpRunStatus::Success_NotFound) {
    390    /* ExecuteRegExp() previously returned an array or null. */
    391    rval.setNull();
    392    return true;
    393  }
    394 
    395  *lastIndex = matches[0].limit;
    396 
    397  if (test) {
    398    /* Forbid an array, as an optimization. */
    399    rval.setBoolean(true);
    400    return true;
    401  }
    402 
    403  return CreateRegExpMatchResult(cx, shared, input, matches, rval);
    404 }
    405 
    406 static bool CheckPatternSyntaxSlow(JSContext* cx, Handle<JSAtom*> pattern,
    407                                   RegExpFlags flags) {
    408  LifoAllocScope allocScope(&cx->tempLifoAlloc());
    409  AutoReportFrontendContext fc(cx);
    410  CompileOptions options(cx);
    411  frontend::DummyTokenStream dummyTokenStream(&fc, options);
    412  return irregexp::CheckPatternSyntax(cx, cx->stackLimitForCurrentPrincipal(),
    413                                      dummyTokenStream, pattern, flags);
    414 }
    415 
    416 static RegExpShared* CheckPatternSyntax(JSContext* cx, Handle<JSAtom*> pattern,
    417                                        RegExpFlags flags) {
    418  // If we already have a RegExpShared for this pattern/flags, we can
    419  // avoid the much slower CheckPatternSyntaxSlow call.
    420 
    421  RootedRegExpShared shared(cx, cx->zone()->regExps().maybeGet(pattern, flags));
    422  if (shared) {
    423 #ifdef DEBUG
    424    // Assert the pattern is valid.
    425    if (!CheckPatternSyntaxSlow(cx, pattern, flags)) {
    426      MOZ_ASSERT(cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed());
    427      return nullptr;
    428    }
    429 #endif
    430    return shared;
    431  }
    432 
    433  if (!CheckPatternSyntaxSlow(cx, pattern, flags)) {
    434    return nullptr;
    435  }
    436 
    437  // Allocate and return a new RegExpShared so we will hit the fast path
    438  // next time.
    439  return cx->zone()->regExps().get(cx, pattern, flags);
    440 }
    441 
    442 /*
    443 * ES 2016 draft Mar 25, 2016 21.2.3.2.2.
    444 *
    445 * Steps 14-15 set |obj|'s "lastIndex" property to zero.  Some of
    446 * RegExpInitialize's callers have a fresh RegExp not yet exposed to script:
    447 * in these cases zeroing "lastIndex" is infallible.  But others have a RegExp
    448 * whose "lastIndex" property might have been made non-writable: here, zeroing
    449 * "lastIndex" can fail.  We efficiently solve this problem by completely
    450 * removing "lastIndex" zeroing from the provided function.
    451 *
    452 * CALLERS MUST HANDLE "lastIndex" ZEROING THEMSELVES!
    453 *
    454 * Because this function only ever returns a user-provided |obj| in the spec,
    455 * we omit it and just return the usual success/failure.
    456 */
    457 static bool RegExpInitializeIgnoringLastIndex(JSContext* cx,
    458                                              Handle<RegExpObject*> obj,
    459                                              HandleValue patternValue,
    460                                              HandleValue flagsValue) {
    461  Rooted<JSAtom*> pattern(cx);
    462  if (patternValue.isUndefined()) {
    463    /* Step 1. */
    464    pattern = cx->names().empty_;
    465  } else {
    466    /* Step 2. */
    467    pattern = ToAtom<CanGC>(cx, patternValue);
    468    if (!pattern) {
    469      return false;
    470    }
    471  }
    472 
    473  /* Step 3. */
    474  RegExpFlags flags = RegExpFlag::NoFlags;
    475  if (!flagsValue.isUndefined()) {
    476    /* Step 4. */
    477    RootedString flagStr(cx, ToString<CanGC>(cx, flagsValue));
    478    if (!flagStr) {
    479      return false;
    480    }
    481 
    482    /* Step 5. */
    483    if (!ParseRegExpFlags(cx, flagStr, &flags)) {
    484      return false;
    485    }
    486  }
    487 
    488  /* Steps 7-8. */
    489  RegExpShared* shared = CheckPatternSyntax(cx, pattern, flags);
    490  if (!shared) {
    491    return false;
    492  }
    493 
    494  /* Steps 9-12. */
    495  obj->initIgnoringLastIndex(pattern, flags);
    496 
    497  obj->setShared(shared);
    498 
    499  return true;
    500 }
    501 
    502 /* ES 2016 draft Mar 25, 2016 21.2.3.2.3. */
    503 bool js::RegExpCreate(JSContext* cx, HandleValue patternValue,
    504                      HandleValue flagsValue, MutableHandleValue rval,
    505                      HandleObject newTarget) {
    506  /* Step 1. */
    507  Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, newTarget));
    508  if (!regexp) {
    509    return false;
    510  }
    511 
    512  /* Step 2. */
    513  if (!RegExpInitializeIgnoringLastIndex(cx, regexp, patternValue,
    514                                         flagsValue)) {
    515    return false;
    516  }
    517  regexp->zeroLastIndex(cx);
    518 
    519  rval.setObject(*regexp);
    520  return true;
    521 }
    522 
    523 MOZ_ALWAYS_INLINE bool IsRegExpObject(HandleValue v) {
    524  return v.isObject() && v.toObject().is<RegExpObject>();
    525 }
    526 
    527 /* ES6 draft rc3 7.2.8. */
    528 bool js::IsRegExp(JSContext* cx, HandleValue value, bool* result) {
    529  /* Step 1. */
    530  if (!value.isObject()) {
    531    *result = false;
    532    return true;
    533  }
    534  RootedObject obj(cx, &value.toObject());
    535 
    536  /* Steps 2-3. */
    537  RootedValue isRegExp(cx);
    538  RootedId matchId(cx, PropertyKey::Symbol(cx->wellKnownSymbols().match));
    539  if (!GetProperty(cx, obj, obj, matchId, &isRegExp)) {
    540    return false;
    541  }
    542 
    543  /* Step 4. */
    544  if (!isRegExp.isUndefined()) {
    545    *result = ToBoolean(isRegExp);
    546    return true;
    547  }
    548 
    549  /* Steps 5-6. */
    550  ESClass cls;
    551  if (!GetClassOfValue(cx, value, &cls)) {
    552    return false;
    553  }
    554 
    555  *result = cls == ESClass::RegExp;
    556  return true;
    557 }
    558 
    559 // The "lastIndex" property is non-configurable, but it can be made
    560 // non-writable. If CalledFromJit is true, we have emitted guards to ensure it's
    561 // writable.
    562 template <bool CalledFromJit = false>
    563 static bool SetLastIndex(JSContext* cx, Handle<RegExpObject*> regexp,
    564                         int32_t lastIndex) {
    565  MOZ_ASSERT(lastIndex >= 0);
    566 
    567  if (CalledFromJit || MOZ_LIKELY(RegExpObject::isInitialShape(regexp)) ||
    568      regexp->lookupPure(cx->names().lastIndex)->writable()) {
    569    regexp->setLastIndex(cx, lastIndex);
    570    return true;
    571  }
    572 
    573  Rooted<Value> val(cx, Int32Value(lastIndex));
    574  return SetProperty(cx, regexp, cx->names().lastIndex, val);
    575 }
    576 
    577 /*
    578 * RegExp.prototype.compile ( pattern, flags )
    579 * https://github.com/tc39/proposal-regexp-legacy-features?tab=readme-ov-file#regexpprototypecompile--pattern-flags-
    580 * ES6 B.2.5.1.
    581 */
    582 MOZ_ALWAYS_INLINE bool regexp_compile_impl(JSContext* cx,
    583                                           const CallArgs& args) {
    584  MOZ_ASSERT(IsRegExpObject(args.thisv()));
    585 
    586  Rooted<RegExpObject*> regexp(cx, &args.thisv().toObject().as<RegExpObject>());
    587 
    588  // Step 7. If Type(pattern) is Object and pattern has a [[RegExpMatcher]]
    589  // internal slot, then
    590  RootedValue patternValue(cx, args.get(0));
    591  ESClass cls;
    592  if (!GetClassOfValue(cx, patternValue, &cls)) {
    593    return false;
    594  }
    595  if (cls == ESClass::RegExp) {
    596    // Step 7.i. If flags is not undefined, throw a TypeError exception.
    597    if (args.hasDefined(1)) {
    598      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
    599                                JSMSG_NEWREGEXP_FLAGGED);
    600      return false;
    601    }
    602 
    603    // Beware!  |patternObj| might be a proxy into another compartment, so
    604    // don't assume |patternObj.is<RegExpObject>()|.  For the same reason,
    605    // don't reuse the RegExpShared below.
    606    RootedObject patternObj(cx, &patternValue.toObject());
    607 
    608    Rooted<JSAtom*> sourceAtom(cx);
    609    RegExpFlags flags = RegExpFlag::NoFlags;
    610    {
    611      // Step 7.ii. Let P be the value of pattern’s [[OriginalSource]] internal
    612      // slot.
    613      RegExpShared* shared = RegExpToShared(cx, patternObj);
    614      if (!shared) {
    615        return false;
    616      }
    617 
    618      sourceAtom = shared->getSource();
    619      flags = shared->getFlags();
    620    }
    621 
    622    // Step 9, minus lastIndex zeroing.
    623    regexp->initIgnoringLastIndex(sourceAtom, flags);
    624  } else {
    625    // Step 8.
    626    RootedValue P(cx, patternValue);
    627    RootedValue F(cx, args.get(1));
    628 
    629    // Step 9, minus lastIndex zeroing.
    630    if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) {
    631      return false;
    632    }
    633  }
    634 
    635  // The final niggling bit of step 8.
    636  //
    637  // |regexp| is user-exposed, so its "lastIndex" property might be
    638  // non-writable.
    639  if (!SetLastIndex(cx, regexp, 0)) {
    640    return false;
    641  }
    642 
    643  args.rval().setObject(*regexp);
    644  return true;
    645 }
    646 
    647 static bool regexp_compile(JSContext* cx, unsigned argc, Value* vp) {
    648  CallArgs args = CallArgsFromVp(argc, vp);
    649 
    650  if (JS::Prefs::experimental_legacy_regexp() && args.thisv().isObject()) {
    651    RootedObject thisObj(cx, &args.thisv().toObject());
    652 
    653    JSObject* unwrapped = js::CheckedUnwrapStatic(thisObj);
    654 
    655    if (unwrapped && unwrapped->is<RegExpObject>()) {
    656      // Step 3. Let thisRealm be the current Realm Record.
    657      JS::Realm* thisRealm = cx->realm();
    658 
    659      // Step 4. Let oRealm be the value of O’s [[Realm]] internal slot.
    660      RegExpObject* regexp = &unwrapped->as<RegExpObject>();
    661 
    662      JS::Realm* oRealm = regexp->realm();
    663 
    664      // Step 5. If SameValue(thisRealm, oRealm) is false, throw a TypeError
    665      // exception.
    666      if (thisRealm != oRealm) {
    667        JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
    668                                  JSMSG_REGEXP_CROSS_REALM);
    669        return false;
    670      }
    671 
    672      // Step 6. If the value of R’s [[LegacyFeaturesEnabled]] internal slot is
    673      // false, throw a TypeError exception.
    674      if (!regexp->legacyFeaturesEnabled()) {
    675        JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
    676                                  JSMSG_REGEXP_LEGACY_FEATURES_DISABLED);
    677        return false;
    678      }
    679    }
    680  }
    681 
    682  /* Steps 1-2. */
    683  return CallNonGenericMethod<IsRegExpObject, regexp_compile_impl>(cx, args);
    684 }
    685 
    686 /*
    687 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1.
    688 */
    689 bool js::regexp_construct(JSContext* cx, unsigned argc, Value* vp) {
    690  AutoJSConstructorProfilerEntry pseudoFrame(cx, "RegExp");
    691  CallArgs args = CallArgsFromVp(argc, vp);
    692 
    693  RootedObject newTarget(cx);
    694 
    695  // Steps 1.
    696  bool patternIsRegExp;
    697  if (!IsRegExp(cx, args.get(0), &patternIsRegExp)) {
    698    return false;
    699  }
    700 
    701  if (!args.isConstructing()) {
    702    // Step 3.b.
    703    if (patternIsRegExp && !args.hasDefined(1)) {
    704      RootedObject patternObj(cx, &args[0].toObject());
    705 
    706      // Step 3.b.i.
    707      RootedValue patternConstructor(cx);
    708      if (!GetProperty(cx, patternObj, patternObj, cx->names().constructor,
    709                       &patternConstructor)) {
    710        return false;
    711      }
    712 
    713      // Step 3.b.ii.
    714      if (patternConstructor.isObject() &&
    715          patternConstructor.toObject() == args.callee()) {
    716        args.rval().set(args[0]);
    717        return true;
    718      }
    719    }
    720  } else {
    721    newTarget = &args.newTarget().toObject();
    722  }
    723 
    724  RootedValue patternValue(cx, args.get(0));
    725 
    726  // Step 4.
    727  ESClass cls;
    728  if (!GetClassOfValue(cx, patternValue, &cls)) {
    729    return false;
    730  }
    731  if (cls == ESClass::RegExp) {
    732    // Beware!  |patternObj| might be a proxy into another compartment, so
    733    // don't assume |patternObj.is<RegExpObject>()|.
    734    RootedObject patternObj(cx, &patternValue.toObject());
    735 
    736    Rooted<JSAtom*> sourceAtom(cx);
    737    RegExpFlags flags;
    738    RootedRegExpShared shared(cx);
    739    {
    740      // Step 4.a.
    741      shared = RegExpToShared(cx, patternObj);
    742      if (!shared) {
    743        return false;
    744      }
    745      sourceAtom = shared->getSource();
    746 
    747      // Step 4.b.
    748      // Get original flags in all cases, to compare with passed flags.
    749      flags = shared->getFlags();
    750 
    751      // If the RegExpShared is in another Zone, don't reuse it.
    752      if (cx->zone() != shared->zone()) {
    753        shared = nullptr;
    754      }
    755    }
    756 
    757    // Step 7.
    758    RootedObject proto(cx);
    759    if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) {
    760      return false;
    761    }
    762 
    763    Rooted<RegExpObject*> regexp(
    764        cx, RegExpAlloc(cx, GenericObject, proto, newTarget));
    765    if (!regexp) {
    766      return false;
    767    }
    768 
    769    // Step 8.
    770    if (args.hasDefined(1)) {
    771      // Step 4.c / 21.2.3.2.2 RegExpInitialize step 4.
    772      RegExpFlags flagsArg = RegExpFlag::NoFlags;
    773      RootedString flagStr(cx, ToString<CanGC>(cx, args[1]));
    774      if (!flagStr) {
    775        return false;
    776      }
    777      if (!ParseRegExpFlags(cx, flagStr, &flagsArg)) {
    778        return false;
    779      }
    780 
    781      // Don't reuse the RegExpShared if we have different flags.
    782      if (flags != flagsArg) {
    783        shared = nullptr;
    784      }
    785 
    786      if (!flags.unicode() && flagsArg.unicode()) {
    787        // Have to check syntax again when adding 'u' flag.
    788 
    789        // ES 2017 draft rev 9b49a888e9dfe2667008a01b2754c3662059ae56
    790        // 21.2.3.2.2 step 7.
    791        shared = CheckPatternSyntax(cx, sourceAtom, flagsArg);
    792        if (!shared) {
    793          return false;
    794        }
    795      }
    796      flags = flagsArg;
    797    }
    798 
    799    regexp->initAndZeroLastIndex(sourceAtom, flags, cx);
    800 
    801    if (shared) {
    802      regexp->setShared(shared);
    803    }
    804 
    805    args.rval().setObject(*regexp);
    806    return true;
    807  }
    808 
    809  RootedValue P(cx);
    810  RootedValue F(cx);
    811 
    812  // Step 5.
    813  if (patternIsRegExp) {
    814    RootedObject patternObj(cx, &patternValue.toObject());
    815 
    816    // Step 5.a.
    817    if (!GetProperty(cx, patternObj, patternObj, cx->names().source, &P)) {
    818      return false;
    819    }
    820 
    821    // Step 5.b.
    822    F = args.get(1);
    823    if (F.isUndefined()) {
    824      if (!GetProperty(cx, patternObj, patternObj, cx->names().flags, &F)) {
    825        return false;
    826      }
    827    }
    828  } else {
    829    // Steps 6.a-b.
    830    P = patternValue;
    831    F = args.get(1);
    832  }
    833 
    834  // Step 7.
    835  RootedObject proto(cx);
    836  if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) {
    837    return false;
    838  }
    839 
    840  Rooted<RegExpObject*> regexp(
    841      cx, RegExpAlloc(cx, GenericObject, proto, newTarget));
    842  if (!regexp) {
    843    return false;
    844  }
    845 
    846  // Step 8.
    847  if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) {
    848    return false;
    849  }
    850  regexp->zeroLastIndex(cx);
    851 
    852  args.rval().setObject(*regexp);
    853  return true;
    854 }
    855 
    856 /*
    857 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1
    858 * steps 4, 7-8.
    859 */
    860 bool js::regexp_construct_raw_flags(JSContext* cx, unsigned argc, Value* vp) {
    861  CallArgs args = CallArgsFromVp(argc, vp);
    862  MOZ_ASSERT(args.length() == 3);
    863  MOZ_ASSERT(!args.isConstructing());
    864 
    865  // Step 4.a.
    866  Rooted<JSAtom*> sourceAtom(cx, AtomizeString(cx, args[0].toString()));
    867  if (!sourceAtom) {
    868    return false;
    869  }
    870 
    871  // Step 4.c.
    872  uint32_t rawFlags = args[1].toInt32();
    873  JS::RegExpFlags flags =
    874      AssertedCast<uint8_t>(rawFlags & RegExpFlag::AllFlags);
    875 
    876  // Self-hosted code can't check prefs efficiently. In some cases it will
    877  // call this with the flag set even when the pref is disabled, in which
    878  // case we should ignore it.
    879  // TODO(bug 2009034): Clean this up when we ship the proposal.
    880  bool legacy = args[2].toBoolean() && JS::Prefs::experimental_legacy_regexp();
    881 
    882  // Step 7.
    883  RegExpObject* regexp = RegExpAlloc(cx, GenericObject);
    884  if (!regexp) {
    885    return false;
    886  }
    887 
    888  // Step 8.
    889  regexp->initAndZeroLastIndex(sourceAtom, flags, cx);
    890  regexp->setLegacyFeaturesEnabled(legacy);
    891  args.rval().setObject(*regexp);
    892  return true;
    893 }
    894 
    895 // This is a specialized implementation of "UnwrapAndTypeCheckThis" for RegExp
    896 // getters that need to return a special value for same-realm
    897 // %RegExp.prototype%.
    898 template <typename Fn>
    899 static bool RegExpGetter(JSContext* cx, CallArgs& args, const char* methodName,
    900                         Fn&& fn,
    901                         HandleValue fallbackValue = UndefinedHandleValue) {
    902  JSObject* obj = nullptr;
    903  if (args.thisv().isObject()) {
    904    obj = &args.thisv().toObject();
    905    if (IsWrapper(obj)) {
    906      obj = CheckedUnwrapStatic(obj);
    907      if (!obj) {
    908        ReportAccessDenied(cx);
    909        return false;
    910      }
    911    }
    912  }
    913 
    914  if (obj) {
    915    // Step 4ff
    916    if (obj->is<RegExpObject>()) {
    917      return fn(&obj->as<RegExpObject>());
    918    }
    919 
    920    // Step 3.a. "If SameValue(R, %RegExp.prototype%) is true, return
    921    // undefined."
    922    // Or `return "(?:)"` for get RegExp.prototype.source.
    923    if (obj == cx->global()->maybeGetRegExpPrototype()) {
    924      args.rval().set(fallbackValue);
    925      return true;
    926    }
    927 
    928    // fall-through
    929  }
    930 
    931  // Step 2. and Step 3.b.
    932  JS_ReportErrorNumberLatin1(cx, GetErrorMessage, nullptr,
    933                             JSMSG_INCOMPATIBLE_REGEXP_GETTER, methodName,
    934                             InformalValueTypeName(args.thisv()));
    935  return false;
    936 }
    937 
    938 bool js::regexp_hasIndices(JSContext* cx, unsigned argc, JS::Value* vp) {
    939  CallArgs args = CallArgsFromVp(argc, vp);
    940  return RegExpGetter(cx, args, "hasIndices", [args](RegExpObject* unwrapped) {
    941    args.rval().setBoolean(unwrapped->hasIndices());
    942    return true;
    943  });
    944 }
    945 
    946 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
    947 // 21.2.5.5 get RegExp.prototype.global
    948 bool js::regexp_global(JSContext* cx, unsigned argc, JS::Value* vp) {
    949  CallArgs args = CallArgsFromVp(argc, vp);
    950  return RegExpGetter(cx, args, "global", [args](RegExpObject* unwrapped) {
    951    args.rval().setBoolean(unwrapped->global());
    952    return true;
    953  });
    954 }
    955 
    956 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
    957 // 21.2.5.6 get RegExp.prototype.ignoreCase
    958 bool js::regexp_ignoreCase(JSContext* cx, unsigned argc, JS::Value* vp) {
    959  CallArgs args = CallArgsFromVp(argc, vp);
    960  return RegExpGetter(cx, args, "ignoreCase", [args](RegExpObject* unwrapped) {
    961    args.rval().setBoolean(unwrapped->ignoreCase());
    962    return true;
    963  });
    964 }
    965 
    966 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
    967 // 21.2.5.9 get RegExp.prototype.multiline
    968 bool js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp) {
    969  CallArgs args = CallArgsFromVp(argc, vp);
    970  return RegExpGetter(cx, args, "multiline", [args](RegExpObject* unwrapped) {
    971    args.rval().setBoolean(unwrapped->multiline());
    972    return true;
    973  });
    974 }
    975 
    976 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
    977 // 21.2.5.12 get RegExp.prototype.source
    978 static bool regexp_source(JSContext* cx, unsigned argc, JS::Value* vp) {
    979  CallArgs args = CallArgsFromVp(argc, vp);
    980  // Step 3.a. Return "(?:)" for %RegExp.prototype%.
    981  RootedValue fallback(cx, StringValue(cx->names().emptyRegExp_));
    982  return RegExpGetter(
    983      cx, args, "source",
    984      [cx, args](RegExpObject* unwrapped) {
    985        Rooted<JSAtom*> src(cx, unwrapped->getSource());
    986        MOZ_ASSERT(src);
    987        // Mark potentially cross-zone JSAtom.
    988        if (cx->zone() != unwrapped->zone()) {
    989          cx->markAtom(src);
    990        }
    991 
    992        // Step 7.
    993        JSString* escaped = EscapeRegExpPattern(cx, src);
    994        if (!escaped) {
    995          return false;
    996        }
    997 
    998        args.rval().setString(escaped);
    999        return true;
   1000      },
   1001      fallback);
   1002 }
   1003 
   1004 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
   1005 // 21.2.5.3 get RegExp.prototype.dotAll
   1006 bool js::regexp_dotAll(JSContext* cx, unsigned argc, JS::Value* vp) {
   1007  CallArgs args = CallArgsFromVp(argc, vp);
   1008  return RegExpGetter(cx, args, "dotAll", [args](RegExpObject* unwrapped) {
   1009    args.rval().setBoolean(unwrapped->dotAll());
   1010    return true;
   1011  });
   1012 }
   1013 
   1014 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
   1015 // 21.2.5.14 get RegExp.prototype.sticky
   1016 bool js::regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp) {
   1017  CallArgs args = CallArgsFromVp(argc, vp);
   1018  return RegExpGetter(cx, args, "sticky", [args](RegExpObject* unwrapped) {
   1019    args.rval().setBoolean(unwrapped->sticky());
   1020    return true;
   1021  });
   1022 }
   1023 
   1024 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
   1025 // 21.2.5.17 get RegExp.prototype.unicode
   1026 bool js::regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp) {
   1027  CallArgs args = CallArgsFromVp(argc, vp);
   1028  return RegExpGetter(cx, args, "unicode", [args](RegExpObject* unwrapped) {
   1029    args.rval().setBoolean(unwrapped->unicode());
   1030    return true;
   1031  });
   1032 }
   1033 
   1034 // https://arai-a.github.io/ecma262-compare/?pr=2418&id=sec-get-regexp.prototype.unicodesets
   1035 // 21.2.6.19 get RegExp.prototype.unicodeSets
   1036 bool js::regexp_unicodeSets(JSContext* cx, unsigned argc, JS::Value* vp) {
   1037  CallArgs args = CallArgsFromVp(argc, vp);
   1038  return RegExpGetter(cx, args, "unicodeSets", [args](RegExpObject* unwrapped) {
   1039    args.rval().setBoolean(unwrapped->unicodeSets());
   1040    return true;
   1041  });
   1042 }
   1043 
   1044 const JSPropertySpec js::regexp_properties[] = {
   1045    JS_SELF_HOSTED_GET("flags", "$RegExpFlagsGetter", 0),
   1046    JS_INLINABLE_PSG("hasIndices", regexp_hasIndices, 0, RegExpHasIndices),
   1047    JS_INLINABLE_PSG("global", regexp_global, 0, RegExpGlobal),
   1048    JS_INLINABLE_PSG("ignoreCase", regexp_ignoreCase, 0, RegExpIgnoreCase),
   1049    JS_INLINABLE_PSG("multiline", regexp_multiline, 0, RegExpMultiline),
   1050    JS_INLINABLE_PSG("dotAll", regexp_dotAll, 0, RegExpDotAll),
   1051    JS_PSG("source", regexp_source, 0),
   1052    JS_INLINABLE_PSG("sticky", regexp_sticky, 0, RegExpSticky),
   1053    JS_INLINABLE_PSG("unicode", regexp_unicode, 0, RegExpUnicode),
   1054    JS_INLINABLE_PSG("unicodeSets", regexp_unicodeSets, 0, RegExpUnicodeSets),
   1055    JS_PS_END,
   1056 };
   1057 
   1058 const JSFunctionSpec js::regexp_methods[] = {
   1059    JS_SELF_HOSTED_FN("toSource", "$RegExpToString", 0, 0),
   1060    JS_SELF_HOSTED_FN("toString", "$RegExpToString", 0, 0),
   1061    JS_FN("compile", regexp_compile, 2, 0),
   1062    JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1, 0),
   1063    JS_SELF_HOSTED_FN("test", "RegExpTest", 1, 0),
   1064    JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1, 0),
   1065    JS_SELF_HOSTED_SYM_FN(matchAll, "RegExpMatchAll", 1, 0),
   1066    JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2, 0),
   1067    JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1, 0),
   1068    JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2, 0),
   1069    JS_FS_END,
   1070 };
   1071 
   1072 static constexpr JS::Latin1Char SHOULD_HEX_ESCAPE = JSString::MAX_LATIN1_CHAR;
   1073 
   1074 /**
   1075 * Ascii escape map.
   1076 *
   1077 * 1. If a character is mapped to zero (0x00), then no escape sequence is used.
   1078 * 2. Else,
   1079 *   a. If a character is mapped to SHOULD_HEX_ESCAPE, then hex-escape.
   1080 *   b. Else, escape with `\` followed by the mapped value.
   1081 */
   1082 static constexpr auto AsciiRegExpEscapeMap() {
   1083  std::array<JS::Latin1Char, 128> result = {};
   1084 
   1085  // SyntaxCharacter or U+002F (SOLIDUS)
   1086  result['^'] = '^';
   1087  result['$'] = '$';
   1088  result['\\'] = '\\';
   1089  result['.'] = '.';
   1090  result['*'] = '*';
   1091  result['+'] = '+';
   1092  result['?'] = '?';
   1093  result['('] = '(';
   1094  result[')'] = ')';
   1095  result['['] = '[';
   1096  result[']'] = ']';
   1097  result['{'] = '{';
   1098  result['}'] = '}';
   1099  result['|'] = '|';
   1100  result['/'] = '/';
   1101 
   1102  // ControlEscape Code Point Values
   1103  result['\t'] = 't';
   1104  result['\n'] = 'n';
   1105  result['\v'] = 'v';
   1106  result['\f'] = 'f';
   1107  result['\r'] = 'r';
   1108 
   1109  // Other punctuators ",-=<>#&!%:;@~'`" or 0x0022 (QUOTATION MARK)
   1110  result[','] = SHOULD_HEX_ESCAPE;
   1111  result['-'] = SHOULD_HEX_ESCAPE;
   1112  result['='] = SHOULD_HEX_ESCAPE;
   1113  result['<'] = SHOULD_HEX_ESCAPE;
   1114  result['>'] = SHOULD_HEX_ESCAPE;
   1115  result['#'] = SHOULD_HEX_ESCAPE;
   1116  result['&'] = SHOULD_HEX_ESCAPE;
   1117  result['!'] = SHOULD_HEX_ESCAPE;
   1118  result['%'] = SHOULD_HEX_ESCAPE;
   1119  result[':'] = SHOULD_HEX_ESCAPE;
   1120  result[';'] = SHOULD_HEX_ESCAPE;
   1121  result['@'] = SHOULD_HEX_ESCAPE;
   1122  result['~'] = SHOULD_HEX_ESCAPE;
   1123  result['\''] = SHOULD_HEX_ESCAPE;
   1124  result['`'] = SHOULD_HEX_ESCAPE;
   1125  result['"'] = SHOULD_HEX_ESCAPE;
   1126 
   1127  // WhiteSpace or LineTerminator
   1128  result[' '] = SHOULD_HEX_ESCAPE;
   1129 
   1130  return result;
   1131 }
   1132 
   1133 /**
   1134 * EncodeForRegExpEscape ( c )
   1135 *
   1136 * https://tc39.es/proposal-regex-escaping/#sec-encodeforregexpescape
   1137 */
   1138 template <typename CharT>
   1139 [[nodiscard]] static bool EncodeForRegExpEscape(
   1140    mozilla::Span<const CharT> chars, JSStringBuilder& sb) {
   1141  MOZ_ASSERT(sb.empty());
   1142 
   1143  const size_t length = chars.size();
   1144  if (length == 0) {
   1145    return true;
   1146  }
   1147 
   1148  static constexpr auto asciiEscapeMap = AsciiRegExpEscapeMap();
   1149 
   1150  // Number of characters added when escaping.
   1151  static constexpr size_t EscapeAddLength = 2 - 1;
   1152  static constexpr size_t HexEscapeAddLength = 4 - 1;
   1153  static constexpr size_t UnicodeEscapeAddLength = 6 - 1;
   1154 
   1155  // Initial scan to determine if escape sequences are needed and to compute
   1156  // the output length.
   1157  size_t outLength = length;
   1158 
   1159  // Leading Ascii alpha-numeric character is hex-escaped.
   1160  size_t scanStart = 0;
   1161  if (mozilla::IsAsciiAlphanumeric(chars[0])) {
   1162    outLength += HexEscapeAddLength;
   1163    scanStart = 1;
   1164  }
   1165 
   1166  for (size_t i = scanStart; i < length; i++) {
   1167    CharT ch = chars[i];
   1168 
   1169    JS::Latin1Char escape = 0;
   1170    if (mozilla::IsAscii(ch)) {
   1171      escape = asciiEscapeMap[ch];
   1172    } else {
   1173      // Surrogate pair.
   1174      if (unicode::IsLeadSurrogate(ch) && i + 1 < length &&
   1175          unicode::IsTrailSurrogate(chars[i + 1])) {
   1176        i += 1;
   1177        continue;
   1178      }
   1179 
   1180      // WhiteSpace or LineTerminator or unmatched surrogate.
   1181      if (unicode::IsSpace(ch) || unicode::IsSurrogate(ch)) {
   1182        escape = SHOULD_HEX_ESCAPE;
   1183      }
   1184    }
   1185    if (!escape) {
   1186      continue;
   1187    }
   1188 
   1189    if (mozilla::IsAscii(escape)) {
   1190      outLength += EscapeAddLength;
   1191    } else if (ch <= JSString::MAX_LATIN1_CHAR) {
   1192      outLength += HexEscapeAddLength;
   1193    } else {
   1194      outLength += UnicodeEscapeAddLength;
   1195    }
   1196  }
   1197 
   1198  // Return if no escape sequences are needed.
   1199  if (outLength == length) {
   1200    return true;
   1201  }
   1202  MOZ_ASSERT(outLength > length);
   1203 
   1204  // Inflating is fallible, so we have to convert to two-byte upfront.
   1205  if constexpr (std::is_same_v<CharT, char16_t>) {
   1206    if (!sb.ensureTwoByteChars()) {
   1207      return false;
   1208    }
   1209  }
   1210 
   1211  // Allocate memory for the output using the final length.
   1212  if (!sb.reserve(outLength)) {
   1213    return false;
   1214  }
   1215 
   1216  // NB: Lower case hex digits.
   1217  static constexpr char HexDigits[] = "0123456789abcdef";
   1218  static_assert(std::char_traits<char>::length(HexDigits) == 16);
   1219 
   1220  // Append |ch| as an escaped character.
   1221  auto appendEscape = [&](JS::Latin1Char ch) {
   1222    MOZ_ASSERT(mozilla::IsAscii(ch));
   1223 
   1224    sb.infallibleAppend('\\');
   1225    sb.infallibleAppend(ch);
   1226  };
   1227 
   1228  // Append |ch| as a hex-escape sequence.
   1229  auto appendHexEscape = [&](CharT ch) {
   1230    MOZ_ASSERT(ch <= JSString::MAX_LATIN1_CHAR);
   1231 
   1232    sb.infallibleAppend('\\');
   1233    sb.infallibleAppend('x');
   1234    sb.infallibleAppend(HexDigits[(ch >> 4) & 0xf]);
   1235    sb.infallibleAppend(HexDigits[ch & 0xf]);
   1236  };
   1237 
   1238  // Append |ch| as a Unicode-escape sequence.
   1239  auto appendUnicodeEscape = [&](char16_t ch) {
   1240    MOZ_ASSERT(ch > JSString::MAX_LATIN1_CHAR);
   1241 
   1242    sb.infallibleAppend('\\');
   1243    sb.infallibleAppend('u');
   1244    sb.infallibleAppend(HexDigits[(ch >> 12) & 0xf]);
   1245    sb.infallibleAppend(HexDigits[(ch >> 8) & 0xf]);
   1246    sb.infallibleAppend(HexDigits[(ch >> 4) & 0xf]);
   1247    sb.infallibleAppend(HexDigits[ch & 0xf]);
   1248  };
   1249 
   1250  // Index after the last character which produced an escape sequence.
   1251  size_t startUnescaped = 0;
   1252 
   1253  // Append unescaped characters from |startUnescaped| (inclusive) to |end|
   1254  // (exclusive).
   1255  auto appendUnescaped = [&](size_t end) {
   1256    MOZ_ASSERT(startUnescaped <= end && end <= length);
   1257 
   1258    if (startUnescaped < end) {
   1259      auto unescaped = chars.FromTo(startUnescaped, end);
   1260      sb.infallibleAppend(unescaped.data(), unescaped.size());
   1261    }
   1262    startUnescaped = end + 1;
   1263  };
   1264 
   1265  // Leading Ascii alpha-numeric character is hex-escaped.
   1266  size_t start = 0;
   1267  if (mozilla::IsAsciiAlphanumeric(chars[0])) {
   1268    appendHexEscape(chars[0]);
   1269 
   1270    start = 1;
   1271    startUnescaped = 1;
   1272  }
   1273 
   1274  for (size_t i = start; i < length; i++) {
   1275    CharT ch = chars[i];
   1276 
   1277    JS::Latin1Char escape = 0;
   1278    if (mozilla::IsAscii(ch)) {
   1279      escape = asciiEscapeMap[ch];
   1280    } else {
   1281      // Surrogate pair.
   1282      if (unicode::IsLeadSurrogate(ch) && i + 1 < length &&
   1283          unicode::IsTrailSurrogate(chars[i + 1])) {
   1284        i += 1;
   1285        continue;
   1286      }
   1287 
   1288      // WhiteSpace or LineTerminator or unmatched surrogate.
   1289      if (unicode::IsSpace(ch) || unicode::IsSurrogate(ch)) {
   1290        escape = SHOULD_HEX_ESCAPE;
   1291      }
   1292    }
   1293    if (!escape) {
   1294      continue;
   1295    }
   1296 
   1297    appendUnescaped(i);
   1298 
   1299    if (mozilla::IsAscii(escape)) {
   1300      appendEscape(escape);
   1301    } else if (ch <= JSString::MAX_LATIN1_CHAR) {
   1302      appendHexEscape(ch);
   1303    } else {
   1304      appendUnicodeEscape(ch);
   1305    }
   1306  }
   1307 
   1308  if (startUnescaped) {
   1309    appendUnescaped(length);
   1310  }
   1311 
   1312  MOZ_ASSERT(sb.length() == outLength, "all characters were written");
   1313  return true;
   1314 }
   1315 
   1316 [[nodiscard]] static bool EncodeForRegExpEscape(JSLinearString* string,
   1317                                                JSStringBuilder& sb) {
   1318  JS::AutoCheckCannotGC nogc;
   1319  if (string->hasLatin1Chars()) {
   1320    auto chars = mozilla::Span(string->latin1Range(nogc));
   1321    return EncodeForRegExpEscape(chars, sb);
   1322  }
   1323  auto chars = mozilla::Span(string->twoByteRange(nogc));
   1324  return EncodeForRegExpEscape(chars, sb);
   1325 }
   1326 
   1327 /**
   1328 * RegExp.escape ( S )
   1329 *
   1330 * https://tc39.es/proposal-regex-escaping/
   1331 */
   1332 static bool regexp_escape(JSContext* cx, unsigned argc, Value* vp) {
   1333  CallArgs args = CallArgsFromVp(argc, vp);
   1334 
   1335  // Step 1.
   1336  if (!args.get(0).isString()) {
   1337    return ReportValueError(cx, JSMSG_UNEXPECTED_TYPE, JSDVG_SEARCH_STACK,
   1338                            args.get(0), nullptr, "not a string");
   1339  }
   1340 
   1341  Rooted<JSLinearString*> string(cx, args[0].toString()->ensureLinear(cx));
   1342  if (!string) {
   1343    return false;
   1344  }
   1345 
   1346  // Step 2-5.
   1347  JSStringBuilder sb(cx);
   1348  if (!EncodeForRegExpEscape(string, sb)) {
   1349    return false;
   1350  }
   1351 
   1352  // Return the input string if no escape sequences were added.
   1353  if (sb.empty()) {
   1354    args.rval().setString(string);
   1355    return true;
   1356  }
   1357 
   1358  auto* result = sb.finishString();
   1359  if (!result) {
   1360    return false;
   1361  }
   1362 
   1363  args.rval().setString(result);
   1364  return true;
   1365 }
   1366 
   1367 #define STATIC_PAREN_GETTER_CODE(parenNum)                        \
   1368  if (!res->createParen(cx, parenNum, args.rval())) return false; \
   1369  if (args.rval().isUndefined())                                  \
   1370    args.rval().setString(cx->runtime()->emptyString);            \
   1371  return true
   1372 
   1373 /*
   1374 * RegExp static properties.
   1375 *
   1376 * RegExp class static properties and their Perl counterparts:
   1377 *
   1378 *  RegExp.input                $_
   1379 *  RegExp.lastMatch            $&
   1380 *  RegExp.lastParen            $+
   1381 *  RegExp.leftContext          $`
   1382 *  RegExp.rightContext         $'
   1383 */
   1384 
   1385 static bool checkRegexpLegacyFeatures(JSContext* cx, const CallArgs& args,
   1386                                      const char* name) {
   1387  if (JS::Prefs::experimental_legacy_regexp()) {
   1388    /* Step 1. Assert C is an object that has an internal slot named
   1389     * internalSlotName.*/
   1390    JSObject* regexpCtor =
   1391        GlobalObject::getOrCreateRegExpConstructor(cx, cx->global());
   1392    if (!regexpCtor) return false;
   1393 
   1394    /* Step 2. If SameValue(C, thisValue) is false, throw TypeError */
   1395    bool same = false;
   1396    if (!args.thisv().isObject() ||
   1397        !SameValue(cx, args.thisv(), ObjectValue(*regexpCtor), &same) ||
   1398        !same) {
   1399      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
   1400                                JSMSG_INCOMPATIBLE_RECEIVER, name,
   1401                                InformalValueTypeName(args.thisv()));
   1402      return false;
   1403    }
   1404 
   1405    /* Step 4. If val is empty, throw a TypeError exception */
   1406    RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
   1407    if (!res) return false;
   1408    if (res->isInvalidated()) {
   1409      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
   1410                                JSMSG_REGEXP_STATIC_EMPTY, name,
   1411                                InformalValueTypeName(args.thisv()));
   1412      return false;
   1413    }
   1414  }
   1415  return true;
   1416 }
   1417 
   1418 #define DEFINE_STATIC_GETTER(name, code)                                   \
   1419  static bool name(JSContext* cx, unsigned argc, Value* vp) {              \
   1420    CallArgs args = CallArgsFromVp(argc, vp);                              \
   1421    RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
   1422    if (!res) return false;                                                \
   1423    if (!checkRegexpLegacyFeatures(cx, args, #name)) return false;         \
   1424    code;                                                                  \
   1425  }
   1426 
   1427 DEFINE_STATIC_GETTER(static_input_getter,
   1428                     return res->createPendingInput(cx, args.rval()))
   1429 DEFINE_STATIC_GETTER(static_lastMatch_getter,
   1430                     return res->createLastMatch(cx, args.rval()))
   1431 DEFINE_STATIC_GETTER(static_lastParen_getter,
   1432                     return res->createLastParen(cx, args.rval()))
   1433 DEFINE_STATIC_GETTER(static_leftContext_getter,
   1434                     return res->createLeftContext(cx, args.rval()))
   1435 DEFINE_STATIC_GETTER(static_rightContext_getter,
   1436                     return res->createRightContext(cx, args.rval()))
   1437 
   1438 DEFINE_STATIC_GETTER(static_paren1_getter, STATIC_PAREN_GETTER_CODE(1))
   1439 DEFINE_STATIC_GETTER(static_paren2_getter, STATIC_PAREN_GETTER_CODE(2))
   1440 DEFINE_STATIC_GETTER(static_paren3_getter, STATIC_PAREN_GETTER_CODE(3))
   1441 DEFINE_STATIC_GETTER(static_paren4_getter, STATIC_PAREN_GETTER_CODE(4))
   1442 DEFINE_STATIC_GETTER(static_paren5_getter, STATIC_PAREN_GETTER_CODE(5))
   1443 DEFINE_STATIC_GETTER(static_paren6_getter, STATIC_PAREN_GETTER_CODE(6))
   1444 DEFINE_STATIC_GETTER(static_paren7_getter, STATIC_PAREN_GETTER_CODE(7))
   1445 DEFINE_STATIC_GETTER(static_paren8_getter, STATIC_PAREN_GETTER_CODE(8))
   1446 DEFINE_STATIC_GETTER(static_paren9_getter, STATIC_PAREN_GETTER_CODE(9))
   1447 
   1448 #define DEFINE_STATIC_SETTER(name, code)                                   \
   1449  static bool name(JSContext* cx, unsigned argc, Value* vp) {              \
   1450    CallArgs args = CallArgsFromVp(argc, vp);                              \
   1451    RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
   1452    if (!res) return false;                                                \
   1453    if (!checkRegexpLegacyFeatures(cx, args, #name)) return false;         \
   1454    code;                                                                  \
   1455    return true;                                                           \
   1456  }
   1457 
   1458 static bool static_input_setter(JSContext* cx, unsigned argc, Value* vp) {
   1459  CallArgs args = CallArgsFromVp(argc, vp);
   1460  if (JS::Prefs::experimental_legacy_regexp()) {
   1461    // Step 1. Assert C is an object that has an internal slot named
   1462    // internalSlotName.
   1463    JSObject* regexpCtor =
   1464        GlobalObject::getOrCreateRegExpConstructor(cx, cx->global());
   1465    if (!regexpCtor) {
   1466      return false;
   1467    }
   1468 
   1469    // Step 2. If SameValue(C, thisValue) is false, throw a TypeError exception.
   1470    bool same = false;
   1471    if (!args.thisv().isObject() ||
   1472        !SameValue(cx, args.thisv(), ObjectValue(*regexpCtor), &same) ||
   1473        !same) {
   1474      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
   1475                                JSMSG_INCOMPATIBLE_RECEIVER,
   1476                                InformalValueTypeName(args.thisv()));
   1477      return false;
   1478    }
   1479  }
   1480 
   1481  RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
   1482  if (!res) {
   1483    return false;
   1484  }
   1485 
   1486  // Step 3. Let strVal be ? ToString(val).
   1487  RootedString str(cx, ToString<CanGC>(cx, args.get(0)));
   1488  if (!str) {
   1489    return false;
   1490  }
   1491 
   1492  // Step 4. Set the value of the internal slot of C named internalSlotName to
   1493  // strVal.
   1494  res->setPendingInput(str);
   1495  args.rval().setString(str);
   1496  return true;
   1497 }
   1498 
   1499 #ifdef NIGHTLY_BUILD
   1500 const JSPropertySpec js::regexp_static_props[] = {
   1501    JS_PSGS("input", static_input_getter, static_input_setter, 0),
   1502    JS_PSG("lastMatch", static_lastMatch_getter, 0),
   1503    JS_PSG("lastParen", static_lastParen_getter, 0),
   1504    JS_PSG("leftContext", static_leftContext_getter, 0),
   1505    JS_PSG("rightContext", static_rightContext_getter, 0),
   1506    JS_PSG("$1", static_paren1_getter, 0),
   1507    JS_PSG("$2", static_paren2_getter, 0),
   1508    JS_PSG("$3", static_paren3_getter, 0),
   1509    JS_PSG("$4", static_paren4_getter, 0),
   1510    JS_PSG("$5", static_paren5_getter, 0),
   1511    JS_PSG("$6", static_paren6_getter, 0),
   1512    JS_PSG("$7", static_paren7_getter, 0),
   1513    JS_PSG("$8", static_paren8_getter, 0),
   1514    JS_PSG("$9", static_paren9_getter, 0),
   1515    JS_PSGS("$_", static_input_getter, static_input_setter, 0),
   1516    JS_PSG("$&", static_lastMatch_getter, 0),
   1517    JS_PSG("$+", static_lastParen_getter, 0),
   1518    JS_PSG("$`", static_leftContext_getter, 0),
   1519    JS_PSG("$'", static_rightContext_getter, 0),
   1520    JS_SELF_HOSTED_SYM_GET(species, "$RegExpSpecies", 0),
   1521    JS_PS_END,
   1522 };
   1523 #else
   1524 const JSPropertySpec js::regexp_static_props[] = {
   1525    JS_PSGS("input", static_input_getter, static_input_setter,
   1526            JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1527    JS_PSG("lastMatch", static_lastMatch_getter,
   1528           JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1529    JS_PSG("lastParen", static_lastParen_getter,
   1530           JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1531    JS_PSG("leftContext", static_leftContext_getter,
   1532           JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1533    JS_PSG("rightContext", static_rightContext_getter,
   1534           JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1535    JS_PSG("$1", static_paren1_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1536    JS_PSG("$2", static_paren2_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1537    JS_PSG("$3", static_paren3_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1538    JS_PSG("$4", static_paren4_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1539    JS_PSG("$5", static_paren5_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1540    JS_PSG("$6", static_paren6_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1541    JS_PSG("$7", static_paren7_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1542    JS_PSG("$8", static_paren8_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1543    JS_PSG("$9", static_paren9_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
   1544    JS_PSGS("$_", static_input_getter, static_input_setter, JSPROP_PERMANENT),
   1545    JS_PSG("$&", static_lastMatch_getter, JSPROP_PERMANENT),
   1546    JS_PSG("$+", static_lastParen_getter, JSPROP_PERMANENT),
   1547    JS_PSG("$`", static_leftContext_getter, JSPROP_PERMANENT),
   1548    JS_PSG("$'", static_rightContext_getter, JSPROP_PERMANENT),
   1549    JS_SELF_HOSTED_SYM_GET(species, "$RegExpSpecies", 0),
   1550    JS_PS_END,
   1551 };
   1552 #endif
   1553 
   1554 const JSFunctionSpec js::regexp_static_methods[] = {
   1555    JS_FN("escape", regexp_escape, 1, 0),
   1556    JS_FS_END,
   1557 };
   1558 
   1559 /*
   1560 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
   1561 * steps 3, 9-14, except 12.a.i, 12.c.i.1.
   1562 */
   1563 static RegExpRunStatus ExecuteRegExp(JSContext* cx, HandleObject regexp,
   1564                                     HandleString string, int32_t lastIndex,
   1565                                     VectorMatchPairs* matches) {
   1566  /*
   1567   * WARNING: Despite the presence of spec step comment numbers, this
   1568   *          algorithm isn't consistent with any ES6 version, draft or
   1569   *          otherwise.  YOU HAVE BEEN WARNED.
   1570   */
   1571 
   1572  /* Steps 1-2 performed by the caller. */
   1573  Handle<RegExpObject*> reobj = regexp.as<RegExpObject>();
   1574 
   1575  RootedRegExpShared re(cx, RegExpObject::getShared(cx, reobj));
   1576  if (!re) {
   1577    return RegExpRunStatus::Error;
   1578  }
   1579 
   1580  RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
   1581  if (!res) {
   1582    return RegExpRunStatus::Error;
   1583  }
   1584 
   1585  Rooted<JSLinearString*> input(cx, string->ensureLinear(cx));
   1586  if (!input) {
   1587    return RegExpRunStatus::Error;
   1588  }
   1589 
   1590  /* Handled by caller */
   1591  MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length());
   1592 
   1593  /* Steps 4-8 performed by the caller. */
   1594 
   1595  /* Steps 3, 10-14, except 12.a.i, 12.c.i.1. */
   1596  RegExpRunStatus status =
   1597      ExecuteRegExpImpl(cx, res, &re, input, lastIndex, matches, reobj);
   1598  if (status == RegExpRunStatus::Error) {
   1599    return RegExpRunStatus::Error;
   1600  }
   1601 
   1602  /* Steps 12.a.i, 12.c.i.i, 15 are done by Self-hosted function. */
   1603  return status;
   1604 }
   1605 
   1606 /*
   1607 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
   1608 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
   1609 */
   1610 static bool RegExpMatcherImpl(JSContext* cx, HandleObject regexp,
   1611                              HandleString string, int32_t lastIndex,
   1612                              MutableHandleValue rval) {
   1613  /* Execute regular expression and gather matches. */
   1614  VectorMatchPairs matches;
   1615 
   1616  /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
   1617  RegExpRunStatus status =
   1618      ExecuteRegExp(cx, regexp, string, lastIndex, &matches);
   1619  if (status == RegExpRunStatus::Error) {
   1620    return false;
   1621  }
   1622 
   1623  /* Steps 12.a, 12.c. */
   1624  if (status == RegExpRunStatus::Success_NotFound) {
   1625    rval.setNull();
   1626    return true;
   1627  }
   1628 
   1629  /* Steps 16-25 */
   1630  RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared());
   1631  return CreateRegExpMatchResult(cx, shared, string, matches, rval);
   1632 }
   1633 
   1634 /*
   1635 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
   1636 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
   1637 */
   1638 bool js::RegExpMatcher(JSContext* cx, unsigned argc, Value* vp) {
   1639  CallArgs args = CallArgsFromVp(argc, vp);
   1640  MOZ_ASSERT(args.length() == 3);
   1641  MOZ_ASSERT(IsRegExpObject(args[0]));
   1642  MOZ_ASSERT(args[1].isString());
   1643  MOZ_ASSERT(args[2].isNumber());
   1644 
   1645  RootedObject regexp(cx, &args[0].toObject());
   1646  RootedString string(cx, args[1].toString());
   1647 
   1648  int32_t lastIndex;
   1649  MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
   1650 
   1651  /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
   1652  return RegExpMatcherImpl(cx, regexp, string, lastIndex, args.rval());
   1653 }
   1654 
   1655 /*
   1656 * Separate interface for use by the JITs.
   1657 * This code cannot re-enter JIT code.
   1658 */
   1659 bool js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp,
   1660                          HandleString input, int32_t lastIndex,
   1661                          MatchPairs* maybeMatches, MutableHandleValue output) {
   1662  MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length());
   1663 
   1664  // RegExp execution was successful only if the pairs have actually been
   1665  // filled in. Note that IC code always passes a nullptr maybeMatches.
   1666  if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
   1667    RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared());
   1668    return CreateRegExpMatchResult(cx, shared, input, *maybeMatches, output);
   1669  }
   1670  return RegExpMatcherImpl(cx, regexp, input, lastIndex, output);
   1671 }
   1672 
   1673 /*
   1674 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
   1675 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
   1676 * This code is inlined in CodeGenerator.cpp generateRegExpSearcherStub,
   1677 * changes to this code need to get reflected in there too.
   1678 */
   1679 static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp,
   1680                               HandleString string, int32_t lastIndex,
   1681                               int32_t* result) {
   1682  /* Execute regular expression and gather matches. */
   1683  VectorMatchPairs matches;
   1684 
   1685 #ifdef DEBUG
   1686  // Ensure we assert if RegExpSearcherLastLimit is called when there's no
   1687  // match.
   1688  cx->regExpSearcherLastLimit = RegExpSearcherLastLimitSentinel;
   1689 #endif
   1690 
   1691  /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
   1692  RegExpRunStatus status =
   1693      ExecuteRegExp(cx, regexp, string, lastIndex, &matches);
   1694  if (status == RegExpRunStatus::Error) {
   1695    return false;
   1696  }
   1697 
   1698  /* Steps 12.a, 12.c. */
   1699  if (status == RegExpRunStatus::Success_NotFound) {
   1700    *result = -1;
   1701    return true;
   1702  }
   1703 
   1704  /* Steps 16-25 */
   1705  *result = CreateRegExpSearchResult(cx, matches);
   1706  return true;
   1707 }
   1708 
   1709 /*
   1710 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
   1711 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
   1712 */
   1713 bool js::RegExpSearcher(JSContext* cx, unsigned argc, Value* vp) {
   1714  CallArgs args = CallArgsFromVp(argc, vp);
   1715  MOZ_ASSERT(args.length() == 3);
   1716  MOZ_ASSERT(IsRegExpObject(args[0]));
   1717  MOZ_ASSERT(args[1].isString());
   1718  MOZ_ASSERT(args[2].isNumber());
   1719 
   1720  RootedObject regexp(cx, &args[0].toObject());
   1721  RootedString string(cx, args[1].toString());
   1722 
   1723  int32_t lastIndex;
   1724  MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
   1725 
   1726  /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
   1727  int32_t result = 0;
   1728  if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, &result)) {
   1729    return false;
   1730  }
   1731 
   1732  args.rval().setInt32(result);
   1733  return true;
   1734 }
   1735 
   1736 /*
   1737 * Separate interface for use by the JITs.
   1738 * This code cannot re-enter JIT code.
   1739 */
   1740 bool js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp,
   1741                           HandleString input, int32_t lastIndex,
   1742                           MatchPairs* maybeMatches, int32_t* result) {
   1743  MOZ_ASSERT(lastIndex >= 0);
   1744 
   1745  // RegExp execution was successful only if the pairs have actually been
   1746  // filled in. Note that IC code always passes a nullptr maybeMatches.
   1747  if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
   1748    *result = CreateRegExpSearchResult(cx, *maybeMatches);
   1749    return true;
   1750  }
   1751  return RegExpSearcherImpl(cx, regexp, input, lastIndex, result);
   1752 }
   1753 
   1754 bool js::RegExpSearcherLastLimit(JSContext* cx, unsigned argc, Value* vp) {
   1755  CallArgs args = CallArgsFromVp(argc, vp);
   1756  MOZ_ASSERT(args.length() == 1);
   1757  MOZ_ASSERT(args[0].isString());
   1758 
   1759  // Assert the limit is not the sentinel value and is valid for this string.
   1760  MOZ_ASSERT(cx->regExpSearcherLastLimit != RegExpSearcherLastLimitSentinel);
   1761  MOZ_ASSERT(cx->regExpSearcherLastLimit <= args[0].toString()->length());
   1762 
   1763  args.rval().setInt32(cx->regExpSearcherLastLimit);
   1764 
   1765 #ifdef DEBUG
   1766  // Ensure we assert if this function is called again without a new call to
   1767  // RegExpSearcher.
   1768  cx->regExpSearcherLastLimit = RegExpSearcherLastLimitSentinel;
   1769 #endif
   1770  return true;
   1771 }
   1772 
   1773 template <bool CalledFromJit>
   1774 static bool RegExpBuiltinExecMatchRaw(JSContext* cx,
   1775                                      Handle<RegExpObject*> regexp,
   1776                                      HandleString input, int32_t lastIndex,
   1777                                      MatchPairs* maybeMatches,
   1778                                      MutableHandleValue output) {
   1779  MOZ_ASSERT(lastIndex >= 0);
   1780  MOZ_ASSERT(size_t(lastIndex) <= input->length());
   1781  MOZ_ASSERT_IF(!CalledFromJit, !maybeMatches);
   1782 
   1783  // RegExp execution was successful only if the pairs have actually been
   1784  // filled in. Note that IC code always passes a nullptr maybeMatches.
   1785  int32_t lastIndexNew = 0;
   1786  if (CalledFromJit && maybeMatches &&
   1787      maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
   1788    RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared());
   1789    if (!CreateRegExpMatchResult(cx, shared, input, *maybeMatches, output)) {
   1790      return false;
   1791    }
   1792    lastIndexNew = (*maybeMatches)[0].limit;
   1793  } else {
   1794    VectorMatchPairs matches;
   1795    RegExpRunStatus status =
   1796        ExecuteRegExp(cx, regexp, input, lastIndex, &matches);
   1797    if (status == RegExpRunStatus::Error) {
   1798      return false;
   1799    }
   1800    if (status == RegExpRunStatus::Success_NotFound) {
   1801      output.setNull();
   1802      lastIndexNew = 0;
   1803    } else {
   1804      RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared());
   1805      if (!CreateRegExpMatchResult(cx, shared, input, matches, output)) {
   1806        return false;
   1807      }
   1808      lastIndexNew = matches[0].limit;
   1809    }
   1810  }
   1811 
   1812  RegExpFlags flags = regexp->getFlags();
   1813  if (!flags.global() && !flags.sticky()) {
   1814    return true;
   1815  }
   1816 
   1817  return SetLastIndex<CalledFromJit>(cx, regexp, lastIndexNew);
   1818 }
   1819 
   1820 bool js::RegExpBuiltinExecMatchFromJit(JSContext* cx,
   1821                                       Handle<RegExpObject*> regexp,
   1822                                       HandleString input,
   1823                                       MatchPairs* maybeMatches,
   1824                                       MutableHandleValue output) {
   1825  int32_t lastIndex = 0;
   1826  if (regexp->isGlobalOrSticky()) {
   1827    lastIndex = regexp->getLastIndex().toInt32();
   1828    MOZ_ASSERT(lastIndex >= 0);
   1829    if (size_t(lastIndex) > input->length()) {
   1830      output.setNull();
   1831      return SetLastIndex<true>(cx, regexp, 0);
   1832    }
   1833  }
   1834  return RegExpBuiltinExecMatchRaw<true>(cx, regexp, input, lastIndex,
   1835                                         maybeMatches, output);
   1836 }
   1837 
   1838 template <bool CalledFromJit>
   1839 static bool RegExpBuiltinExecTestRaw(JSContext* cx,
   1840                                     Handle<RegExpObject*> regexp,
   1841                                     HandleString input, int32_t lastIndex,
   1842                                     bool* result) {
   1843  MOZ_ASSERT(lastIndex >= 0);
   1844  MOZ_ASSERT(size_t(lastIndex) <= input->length());
   1845 
   1846  VectorMatchPairs matches;
   1847  RegExpRunStatus status =
   1848      ExecuteRegExp(cx, regexp, input, lastIndex, &matches);
   1849  if (status == RegExpRunStatus::Error) {
   1850    return false;
   1851  }
   1852 
   1853  *result = (status == RegExpRunStatus::Success);
   1854 
   1855  RegExpFlags flags = regexp->getFlags();
   1856  if (!flags.global() && !flags.sticky()) {
   1857    return true;
   1858  }
   1859 
   1860  int32_t lastIndexNew = *result ? matches[0].limit : 0;
   1861  return SetLastIndex<CalledFromJit>(cx, regexp, lastIndexNew);
   1862 }
   1863 
   1864 bool js::RegExpBuiltinExecTestFromJit(JSContext* cx,
   1865                                      Handle<RegExpObject*> regexp,
   1866                                      HandleString input, bool* result) {
   1867  int32_t lastIndex = 0;
   1868  if (regexp->isGlobalOrSticky()) {
   1869    lastIndex = regexp->getLastIndex().toInt32();
   1870    MOZ_ASSERT(lastIndex >= 0);
   1871    if (size_t(lastIndex) > input->length()) {
   1872      *result = false;
   1873      return SetLastIndex<true>(cx, regexp, 0);
   1874    }
   1875  }
   1876  return RegExpBuiltinExecTestRaw<true>(cx, regexp, input, lastIndex, result);
   1877 }
   1878 
   1879 using CapturesVector = GCVector<Value, 4>;
   1880 
   1881 struct JSSubString {
   1882  JSLinearString* base = nullptr;
   1883  size_t offset = 0;
   1884  size_t length = 0;
   1885 
   1886  JSSubString() = default;
   1887 
   1888  void initEmpty(JSLinearString* base) {
   1889    this->base = base;
   1890    offset = length = 0;
   1891  }
   1892  void init(JSLinearString* base, size_t offset, size_t length) {
   1893    this->base = base;
   1894    this->offset = offset;
   1895    this->length = length;
   1896  }
   1897 };
   1898 
   1899 static void GetParen(JSLinearString* matched, const JS::Value& capture,
   1900                     JSSubString* out) {
   1901  if (capture.isUndefined()) {
   1902    out->initEmpty(matched);
   1903    return;
   1904  }
   1905  JSLinearString& captureLinear = capture.toString()->asLinear();
   1906  out->init(&captureLinear, 0, captureLinear.length());
   1907 }
   1908 
   1909 template <typename CharT>
   1910 static bool InterpretDollar(JSLinearString* matched, JSLinearString* string,
   1911                            size_t position, size_t tailPos,
   1912                            Handle<CapturesVector> captures,
   1913                            Handle<CapturesVector> namedCaptures,
   1914                            JSLinearString* replacement,
   1915                            const CharT* replacementBegin,
   1916                            const CharT* currentDollar,
   1917                            const CharT* replacementEnd, JSSubString* out,
   1918                            size_t* skip, uint32_t* currentNamedCapture) {
   1919  MOZ_ASSERT(*currentDollar == '$');
   1920 
   1921  /* If there is only a dollar, bail now. */
   1922  if (currentDollar + 1 >= replacementEnd) {
   1923    return false;
   1924  }
   1925 
   1926  // ES 2021 Table 57: Replacement Text Symbol Substitutions
   1927  // https://tc39.es/ecma262/#table-replacement-text-symbol-substitutions
   1928  char16_t c = currentDollar[1];
   1929  if (IsAsciiDigit(c)) {
   1930    /* $n, $nn */
   1931    unsigned num = AsciiDigitToNumber(c);
   1932    if (num > captures.length()) {
   1933      // The result is implementation-defined. Do not substitute.
   1934      return false;
   1935    }
   1936 
   1937    const CharT* currentChar = currentDollar + 2;
   1938    if (currentChar < replacementEnd) {
   1939      c = *currentChar;
   1940      if (IsAsciiDigit(c)) {
   1941        unsigned tmpNum = 10 * num + AsciiDigitToNumber(c);
   1942        // If num > captures.length(), the result is implementation-defined.
   1943        // Consume next character only if num <= captures.length().
   1944        if (tmpNum <= captures.length()) {
   1945          currentChar++;
   1946          num = tmpNum;
   1947        }
   1948      }
   1949    }
   1950 
   1951    if (num == 0) {
   1952      // The result is implementation-defined. Do not substitute.
   1953      return false;
   1954    }
   1955 
   1956    *skip = currentChar - currentDollar;
   1957 
   1958    MOZ_ASSERT(num <= captures.length());
   1959 
   1960    GetParen(matched, captures[num - 1], out);
   1961    return true;
   1962  }
   1963 
   1964  // '$<': Named Captures
   1965  if (c == '<') {
   1966    // Step 1.
   1967    if (namedCaptures.length() == 0) {
   1968      return false;
   1969    }
   1970 
   1971    // Step 2.b
   1972    const CharT* nameStart = currentDollar + 2;
   1973    const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd);
   1974 
   1975    // Step 2.c
   1976    if (!nameEnd) {
   1977      return false;
   1978    }
   1979 
   1980    // Step 2.d
   1981    // We precompute named capture replacements in InitNamedCaptures.
   1982    // They are stored in the order in which we will need them, so here
   1983    // we can just take the next one in the list.
   1984    size_t nameLength = nameEnd - nameStart;
   1985    *skip = nameLength + 3;  // $<...>
   1986 
   1987    // Steps 2.d.iii-iv
   1988    GetParen(matched, namedCaptures[*currentNamedCapture], out);
   1989    *currentNamedCapture += 1;
   1990    return true;
   1991  }
   1992 
   1993  switch (c) {
   1994    default:
   1995      return false;
   1996    case '$':
   1997      out->init(replacement, currentDollar - replacementBegin, 1);
   1998      break;
   1999    case '&':
   2000      out->init(matched, 0, matched->length());
   2001      break;
   2002    case '`':
   2003      out->init(string, 0, position);
   2004      break;
   2005    case '\'':
   2006      if (tailPos >= string->length()) {
   2007        out->initEmpty(matched);
   2008      } else {
   2009        out->init(string, tailPos, string->length() - tailPos);
   2010      }
   2011      break;
   2012  }
   2013 
   2014  *skip = 2;
   2015  return true;
   2016 }
   2017 
   2018 template <typename CharT>
   2019 static bool FindReplaceLengthString(JSContext* cx,
   2020                                    Handle<JSLinearString*> matched,
   2021                                    Handle<JSLinearString*> string,
   2022                                    size_t position, size_t tailPos,
   2023                                    Handle<CapturesVector> captures,
   2024                                    Handle<CapturesVector> namedCaptures,
   2025                                    Handle<JSLinearString*> replacement,
   2026                                    size_t firstDollarIndex, size_t* sizep) {
   2027  CheckedInt<uint32_t> replen = replacement->length();
   2028 
   2029  JS::AutoCheckCannotGC nogc;
   2030  MOZ_ASSERT(firstDollarIndex < replacement->length());
   2031  const CharT* replacementBegin = replacement->chars<CharT>(nogc);
   2032  const CharT* currentDollar = replacementBegin + firstDollarIndex;
   2033  const CharT* replacementEnd = replacementBegin + replacement->length();
   2034  uint32_t currentNamedCapture = 0;
   2035  do {
   2036    JSSubString sub;
   2037    size_t skip;
   2038    if (InterpretDollar(matched, string, position, tailPos, captures,
   2039                        namedCaptures, replacement, replacementBegin,
   2040                        currentDollar, replacementEnd, &sub, &skip,
   2041                        &currentNamedCapture)) {
   2042      if (sub.length > skip) {
   2043        replen += sub.length - skip;
   2044      } else {
   2045        replen -= skip - sub.length;
   2046      }
   2047      currentDollar += skip;
   2048    } else {
   2049      currentDollar++;
   2050    }
   2051 
   2052    currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
   2053  } while (currentDollar);
   2054 
   2055  if (!replen.isValid()) {
   2056    ReportAllocationOverflow(cx);
   2057    return false;
   2058  }
   2059 
   2060  *sizep = replen.value();
   2061  return true;
   2062 }
   2063 
   2064 static bool FindReplaceLength(JSContext* cx, Handle<JSLinearString*> matched,
   2065                              Handle<JSLinearString*> string, size_t position,
   2066                              size_t tailPos, Handle<CapturesVector> captures,
   2067                              Handle<CapturesVector> namedCaptures,
   2068                              Handle<JSLinearString*> replacement,
   2069                              size_t firstDollarIndex, size_t* sizep) {
   2070  return replacement->hasLatin1Chars()
   2071             ? FindReplaceLengthString<Latin1Char>(
   2072                   cx, matched, string, position, tailPos, captures,
   2073                   namedCaptures, replacement, firstDollarIndex, sizep)
   2074             : FindReplaceLengthString<char16_t>(
   2075                   cx, matched, string, position, tailPos, captures,
   2076                   namedCaptures, replacement, firstDollarIndex, sizep);
   2077 }
   2078 
   2079 /*
   2080 * Precondition: |sb| already has necessary growth space reserved (as
   2081 * derived from FindReplaceLength), and has been inflated to TwoByte if
   2082 * necessary.
   2083 */
   2084 template <typename CharT>
   2085 static void DoReplace(Handle<JSLinearString*> matched,
   2086                      Handle<JSLinearString*> string, size_t position,
   2087                      size_t tailPos, Handle<CapturesVector> captures,
   2088                      Handle<CapturesVector> namedCaptures,
   2089                      Handle<JSLinearString*> replacement,
   2090                      size_t firstDollarIndex, StringBuilder& sb) {
   2091  JS::AutoCheckCannotGC nogc;
   2092  const CharT* replacementBegin = replacement->chars<CharT>(nogc);
   2093  const CharT* currentChar = replacementBegin;
   2094 
   2095  MOZ_ASSERT(firstDollarIndex < replacement->length());
   2096  const CharT* currentDollar = replacementBegin + firstDollarIndex;
   2097  const CharT* replacementEnd = replacementBegin + replacement->length();
   2098  uint32_t currentNamedCapture = 0;
   2099  do {
   2100    /* Move one of the constant portions of the replacement value. */
   2101    size_t len = currentDollar - currentChar;
   2102    sb.infallibleAppend(currentChar, len);
   2103    currentChar = currentDollar;
   2104 
   2105    JSSubString sub;
   2106    size_t skip;
   2107    if (InterpretDollar(matched, string, position, tailPos, captures,
   2108                        namedCaptures, replacement, replacementBegin,
   2109                        currentDollar, replacementEnd, &sub, &skip,
   2110                        &currentNamedCapture)) {
   2111      sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
   2112      currentChar += skip;
   2113      currentDollar += skip;
   2114    } else {
   2115      currentDollar++;
   2116    }
   2117 
   2118    currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
   2119  } while (currentDollar);
   2120  sb.infallibleAppend(currentChar,
   2121                      replacement->length() - (currentChar - replacementBegin));
   2122 }
   2123 
   2124 /*
   2125 * This function finds the list of named captures of the form
   2126 * "$<name>" in a replacement string and converts them into jsids, for
   2127 * use in InitNamedReplacements.
   2128 */
   2129 template <typename CharT>
   2130 static bool CollectNames(JSContext* cx, Handle<JSLinearString*> replacement,
   2131                         size_t firstDollarIndex,
   2132                         MutableHandle<GCVector<jsid>> names) {
   2133  JS::AutoCheckCannotGC nogc;
   2134  MOZ_ASSERT(firstDollarIndex < replacement->length());
   2135 
   2136  const CharT* replacementBegin = replacement->chars<CharT>(nogc);
   2137  const CharT* currentDollar = replacementBegin + firstDollarIndex;
   2138  const CharT* replacementEnd = replacementBegin + replacement->length();
   2139 
   2140  // https://tc39.es/ecma262/#table-45, "$<" section
   2141  while (currentDollar && currentDollar + 1 < replacementEnd) {
   2142    if (currentDollar[1] == '<') {
   2143      // Step 2.b
   2144      const CharT* nameStart = currentDollar + 2;
   2145      const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd);
   2146 
   2147      // Step 2.c
   2148      if (!nameEnd) {
   2149        return true;
   2150      }
   2151 
   2152      // Step 2.d.i
   2153      size_t nameLength = nameEnd - nameStart;
   2154      JSAtom* atom = AtomizeChars(cx, nameStart, nameLength);
   2155      if (!atom || !names.append(AtomToId(atom))) {
   2156        return false;
   2157      }
   2158      currentDollar = nameEnd + 1;
   2159    } else {
   2160      currentDollar += 2;
   2161    }
   2162    currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
   2163  }
   2164  return true;
   2165 }
   2166 
   2167 /*
   2168 * When replacing named captures, the spec requires us to perform
   2169 * `Get(match.groups, name)` for each "$<name>". These `Get`s can be
   2170 * script-visible; for example, RegExp can be extended with an `exec`
   2171 * method that wraps `groups` in a proxy. To make sure that we do the
   2172 * right thing, if a regexp has named captures, we find the named
   2173 * capture replacements before beginning the actual replacement.
   2174 * This guarantees that we will call GetProperty once and only once for
   2175 * each "$<name>" in the replacement string, in the correct order.
   2176 *
   2177 * This function precomputes the results of step 2 of the '$<' case
   2178 * here: https://tc39.es/proposal-regexp-named-groups/#table-45, so
   2179 * that when we need to access the nth named capture in InterpretDollar,
   2180 * we can just use the nth value stored in namedCaptures.
   2181 */
   2182 static bool InitNamedCaptures(JSContext* cx,
   2183                              Handle<JSLinearString*> replacement,
   2184                              HandleObject groups, size_t firstDollarIndex,
   2185                              MutableHandle<CapturesVector> namedCaptures) {
   2186  Rooted<GCVector<jsid>> names(cx, cx);
   2187  if (replacement->hasLatin1Chars()) {
   2188    if (!CollectNames<Latin1Char>(cx, replacement, firstDollarIndex, &names)) {
   2189      return false;
   2190    }
   2191  } else {
   2192    if (!CollectNames<char16_t>(cx, replacement, firstDollarIndex, &names)) {
   2193      return false;
   2194    }
   2195  }
   2196 
   2197  // https://tc39.es/ecma262/#table-45, "$<" section
   2198  RootedId id(cx);
   2199  RootedValue capture(cx);
   2200  for (uint32_t i = 0; i < names.length(); i++) {
   2201    // Step 2.d.i
   2202    id = names[i];
   2203 
   2204    // Step 2.d.ii
   2205    if (!GetProperty(cx, groups, groups, id, &capture)) {
   2206      return false;
   2207    }
   2208 
   2209    // Step 2.d.iii
   2210    if (capture.isUndefined()) {
   2211      if (!namedCaptures.append(capture)) {
   2212        return false;
   2213      }
   2214    } else {
   2215      // Step 2.d.iv
   2216      JSString* str = ToString<CanGC>(cx, capture);
   2217      if (!str) {
   2218        return false;
   2219      }
   2220      JSLinearString* linear = str->ensureLinear(cx);
   2221      if (!linear) {
   2222        return false;
   2223      }
   2224      if (!namedCaptures.append(StringValue(linear))) {
   2225        return false;
   2226      }
   2227    }
   2228  }
   2229 
   2230  return true;
   2231 }
   2232 
   2233 static bool NeedTwoBytes(Handle<JSLinearString*> string,
   2234                         Handle<JSLinearString*> replacement,
   2235                         Handle<JSLinearString*> matched,
   2236                         Handle<CapturesVector> captures,
   2237                         Handle<CapturesVector> namedCaptures) {
   2238  if (string->hasTwoByteChars()) {
   2239    return true;
   2240  }
   2241  if (replacement->hasTwoByteChars()) {
   2242    return true;
   2243  }
   2244  if (matched->hasTwoByteChars()) {
   2245    return true;
   2246  }
   2247 
   2248  for (const Value& capture : captures) {
   2249    if (capture.isUndefined()) {
   2250      continue;
   2251    }
   2252    if (capture.toString()->hasTwoByteChars()) {
   2253      return true;
   2254    }
   2255  }
   2256 
   2257  for (const Value& capture : namedCaptures) {
   2258    if (capture.isUndefined()) {
   2259      continue;
   2260    }
   2261    if (capture.toString()->hasTwoByteChars()) {
   2262      return true;
   2263    }
   2264  }
   2265 
   2266  return false;
   2267 }
   2268 
   2269 // ES2024 draft rev d4927f9bc3706484c75dfef4bbcf5ba826d2632e
   2270 //
   2271 // 22.2.7.2 RegExpBuiltinExec ( R, S )
   2272 // https://tc39.es/ecma262/#sec-regexpbuiltinexec
   2273 //
   2274 // If `forTest` is true, this is called from `RegExp.prototype.test` and we can
   2275 // avoid allocating a result object.
   2276 bool js::RegExpBuiltinExec(JSContext* cx, Handle<RegExpObject*> regexp,
   2277                           Handle<JSString*> string, bool forTest,
   2278                           MutableHandle<Value> rval) {
   2279  // Step 2.
   2280  uint64_t lastIndex;
   2281  if (MOZ_LIKELY(regexp->getLastIndex().isInt32())) {
   2282    lastIndex = std::max(regexp->getLastIndex().toInt32(), 0);
   2283  } else {
   2284    Rooted<Value> lastIndexVal(cx, regexp->getLastIndex());
   2285    if (!ToLength(cx, lastIndexVal, &lastIndex)) {
   2286      return false;
   2287    }
   2288  }
   2289 
   2290  // Steps 3-5.
   2291  bool globalOrSticky = regexp->isGlobalOrSticky();
   2292 
   2293  // Step 7.
   2294  if (!globalOrSticky) {
   2295    lastIndex = 0;
   2296  } else {
   2297    // Steps 1, 13.a.
   2298    if (lastIndex > string->length()) {
   2299      if (!SetLastIndex(cx, regexp, 0)) {
   2300        return false;
   2301      }
   2302      rval.set(forTest ? BooleanValue(false) : NullValue());
   2303      return true;
   2304    }
   2305  }
   2306 
   2307  MOZ_ASSERT(lastIndex <= string->length());
   2308  static_assert(JSString::MAX_LENGTH <= INT32_MAX, "lastIndex fits in int32_t");
   2309 
   2310  // Steps 6, 8-35.
   2311  RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
   2312  if (!res) {
   2313    return false;
   2314  }
   2315 
   2316  if (forTest) {
   2317    bool result;
   2318    if (!RegExpBuiltinExecTestRaw<false>(cx, regexp, string, int32_t(lastIndex),
   2319                                         &result)) {
   2320      return false;
   2321    }
   2322 
   2323    rval.setBoolean(result);
   2324    return true;
   2325  }
   2326 
   2327  return RegExpBuiltinExecMatchRaw<false>(cx, regexp, string,
   2328                                          int32_t(lastIndex), nullptr, rval);
   2329 }
   2330 
   2331 bool js::IsOptimizableRegExpObject(JSObject* obj, JSContext* cx) {
   2332  // Check the shape to ensure this is a plain RegExpObject with this realm's
   2333  // RegExp.prototype as prototype and without any extra own properties.
   2334  // The fuse check ensures RegExp.prototype is optimizable.
   2335  bool optimizable =
   2336      obj->shape() == cx->global()->maybeRegExpShapeWithDefaultProto() &&
   2337      cx->realm()->realmFuses.optimizeRegExpPrototypeFuse.intact();
   2338  MOZ_ASSERT_IF(optimizable,
   2339                obj->is<RegExpObject>() &&
   2340                    obj->as<RegExpObject>().realm() == cx->realm());
   2341  return optimizable;
   2342 }
   2343 
   2344 // ES2024 draft rev d4927f9bc3706484c75dfef4bbcf5ba826d2632e
   2345 //
   2346 // 22.2.7.1 RegExpExec ( R, S )
   2347 // https://tc39.es/ecma262/#sec-regexpexec
   2348 //
   2349 // If `forTest` is true, this is called from `RegExp.prototype.test` and we can
   2350 // avoid allocating a result object.
   2351 bool js::RegExpExec(JSContext* cx, Handle<JSObject*> regexp,
   2352                    Handle<JSString*> string, bool forTest,
   2353                    MutableHandle<Value> rval) {
   2354  // Fast path for the case where `regexp` is a regular expression object with
   2355  // the builtin `RegExp.prototype.exec` function.
   2356  if (MOZ_LIKELY(IsOptimizableRegExpObject(regexp, cx))) {
   2357    return RegExpBuiltinExec(cx, regexp.as<RegExpObject>(), string, forTest,
   2358                             rval);
   2359  }
   2360 
   2361  // Step 1.
   2362  Rooted<Value> exec(cx);
   2363  Rooted<PropertyKey> execKey(cx, NameToId(cx->names().exec));
   2364  if (!GetProperty(cx, regexp, regexp, execKey, &exec)) {
   2365    return false;
   2366  }
   2367 
   2368  // Step 2.
   2369  // If exec is the original RegExp.prototype.exec, use the same, faster,
   2370  // path as for the case where exec isn't callable.
   2371  PropertyName* execName = cx->names().RegExp_prototype_Exec;
   2372  if (IsSelfHostedFunctionWithName(exec, execName) || !IsCallable(exec)) {
   2373    // Steps 3-4.
   2374    if (MOZ_LIKELY(regexp->is<RegExpObject>())) {
   2375      return RegExpBuiltinExec(cx, regexp.as<RegExpObject>(), string, forTest,
   2376                               rval);
   2377    }
   2378 
   2379    // Throw an exception if it's not a wrapped RegExpObject that we can safely
   2380    // unwrap.
   2381    if (!regexp->canUnwrapAs<RegExpObject>()) {
   2382      Rooted<Value> thisv(cx, ObjectValue(*regexp));
   2383      return ReportIncompatibleSelfHostedMethod(
   2384          cx, thisv, IncompatibleContext::RegExpExec);
   2385    }
   2386 
   2387    // Call RegExpBuiltinExec in the regular expression's realm.
   2388    Rooted<RegExpObject*> unwrapped(cx, &regexp->unwrapAs<RegExpObject>());
   2389    {
   2390      AutoRealm ar(cx, unwrapped);
   2391      Rooted<JSString*> wrappedString(cx, string);
   2392      if (!cx->compartment()->wrap(cx, &wrappedString)) {
   2393        return false;
   2394      }
   2395      if (!RegExpBuiltinExec(cx, unwrapped, wrappedString, forTest, rval)) {
   2396        return false;
   2397      }
   2398    }
   2399    return cx->compartment()->wrap(cx, rval);
   2400  }
   2401 
   2402  // Step 2.a.
   2403  Rooted<Value> thisv(cx, ObjectValue(*regexp));
   2404  FixedInvokeArgs<1> args(cx);
   2405  args[0].setString(string);
   2406  if (!js::Call(cx, exec, thisv, args, rval, CallReason::CallContent)) {
   2407    return false;
   2408  }
   2409 
   2410  // Step 2.b.
   2411  if (!rval.isObjectOrNull()) {
   2412    JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
   2413                              JSMSG_EXEC_NOT_OBJORNULL);
   2414    return false;
   2415  }
   2416 
   2417  // Step 2.c.
   2418  if (forTest) {
   2419    rval.setBoolean(rval.isObject());
   2420  }
   2421  return true;
   2422 }
   2423 
   2424 bool js::RegExpHasCaptureGroups(JSContext* cx, Handle<RegExpObject*> obj,
   2425                                Handle<JSString*> input, bool* result) {
   2426  // pairCount is only available for compiled regular expressions.
   2427  if (!obj->hasShared() ||
   2428      obj->getShared()->kind() == RegExpShared::Kind::Unparsed) {
   2429    Rooted<RegExpShared*> shared(cx, RegExpObject::getShared(cx, obj));
   2430    if (!shared) {
   2431      return false;
   2432    }
   2433    Rooted<JSLinearString*> inputLinear(cx, input->ensureLinear(cx));
   2434    if (!inputLinear) {
   2435      return false;
   2436    }
   2437    if (!RegExpShared::compileIfNecessary(cx, &shared, inputLinear,
   2438                                          RegExpShared::CodeKind::Any)) {
   2439      return false;
   2440    }
   2441  }
   2442 
   2443  MOZ_ASSERT(obj->getShared()->pairCount() >= 1);
   2444 
   2445  *result = obj->getShared()->pairCount() > 1;
   2446  return true;
   2447 }
   2448 
   2449 /* ES 2021 21.1.3.17.1 */
   2450 // https://tc39.es/ecma262/#sec-getsubstitution
   2451 bool js::RegExpGetSubstitution(JSContext* cx, Handle<ArrayObject*> matchResult,
   2452                               Handle<JSLinearString*> string, size_t position,
   2453                               Handle<JSLinearString*> replacement,
   2454                               size_t firstDollarIndex, HandleValue groups,
   2455                               MutableHandleValue rval) {
   2456  MOZ_ASSERT(firstDollarIndex < replacement->length());
   2457 
   2458  // Step 1 (skipped).
   2459 
   2460  // Step 10 (reordered).
   2461  uint32_t matchResultLength = matchResult->length();
   2462  MOZ_ASSERT(matchResultLength > 0);
   2463  MOZ_ASSERT(matchResultLength == matchResult->getDenseInitializedLength());
   2464 
   2465  const Value& matchedValue = matchResult->getDenseElement(0);
   2466  Rooted<JSLinearString*> matched(cx,
   2467                                  matchedValue.toString()->ensureLinear(cx));
   2468  if (!matched) {
   2469    return false;
   2470  }
   2471 
   2472  // Step 2.
   2473  size_t matchLength = matched->length();
   2474 
   2475  // Steps 3-5 (skipped).
   2476 
   2477  // Step 6.
   2478  MOZ_ASSERT(position <= string->length());
   2479 
   2480  uint32_t nCaptures = matchResultLength - 1;
   2481  Rooted<CapturesVector> captures(cx, CapturesVector(cx));
   2482  if (!captures.reserve(nCaptures)) {
   2483    return false;
   2484  }
   2485 
   2486  // Step 7.
   2487  for (uint32_t i = 1; i <= nCaptures; i++) {
   2488    const Value& capture = matchResult->getDenseElement(i);
   2489 
   2490    if (capture.isUndefined()) {
   2491      captures.infallibleAppend(capture);
   2492      continue;
   2493    }
   2494 
   2495    JSLinearString* captureLinear = capture.toString()->ensureLinear(cx);
   2496    if (!captureLinear) {
   2497      return false;
   2498    }
   2499    captures.infallibleAppend(StringValue(captureLinear));
   2500  }
   2501 
   2502  Rooted<CapturesVector> namedCaptures(cx, cx);
   2503  if (groups.isObject()) {
   2504    RootedObject groupsObj(cx, &groups.toObject());
   2505    if (!InitNamedCaptures(cx, replacement, groupsObj, firstDollarIndex,
   2506                           &namedCaptures)) {
   2507      return false;
   2508    }
   2509  } else {
   2510    MOZ_ASSERT(groups.isUndefined());
   2511  }
   2512 
   2513  // Step 8 (skipped).
   2514 
   2515  // Step 9.
   2516  CheckedInt<uint32_t> checkedTailPos(0);
   2517  checkedTailPos += position;
   2518  checkedTailPos += matchLength;
   2519  if (!checkedTailPos.isValid()) {
   2520    ReportAllocationOverflow(cx);
   2521    return false;
   2522  }
   2523  uint32_t tailPos = checkedTailPos.value();
   2524 
   2525  // Step 11.
   2526  size_t reserveLength;
   2527  if (!FindReplaceLength(cx, matched, string, position, tailPos, captures,
   2528                         namedCaptures, replacement, firstDollarIndex,
   2529                         &reserveLength)) {
   2530    return false;
   2531  }
   2532 
   2533  JSStringBuilder result(cx);
   2534  if (NeedTwoBytes(string, replacement, matched, captures, namedCaptures)) {
   2535    if (!result.ensureTwoByteChars()) {
   2536      return false;
   2537    }
   2538  }
   2539 
   2540  if (!result.reserve(reserveLength)) {
   2541    return false;
   2542  }
   2543 
   2544  if (replacement->hasLatin1Chars()) {
   2545    DoReplace<Latin1Char>(matched, string, position, tailPos, captures,
   2546                          namedCaptures, replacement, firstDollarIndex, result);
   2547  } else {
   2548    DoReplace<char16_t>(matched, string, position, tailPos, captures,
   2549                        namedCaptures, replacement, firstDollarIndex, result);
   2550  }
   2551 
   2552  // Step 12.
   2553  JSString* resultString = result.finishString();
   2554  if (!resultString) {
   2555    return false;
   2556  }
   2557 
   2558  rval.setString(resultString);
   2559  return true;
   2560 }
   2561 
   2562 bool js::GetFirstDollarIndex(JSContext* cx, unsigned argc, Value* vp) {
   2563  CallArgs args = CallArgsFromVp(argc, vp);
   2564  MOZ_ASSERT(args.length() == 1);
   2565  JSString* str = args[0].toString();
   2566 
   2567  // Should be handled in different path.
   2568  MOZ_ASSERT(str->length() != 0);
   2569 
   2570  int32_t index = -1;
   2571  if (!GetFirstDollarIndexRaw(cx, str, &index)) {
   2572    return false;
   2573  }
   2574 
   2575  args.rval().setInt32(index);
   2576  return true;
   2577 }
   2578 
   2579 template <typename TextChar>
   2580 static MOZ_ALWAYS_INLINE int GetFirstDollarIndexImpl(const TextChar* text,
   2581                                                     uint32_t textLen) {
   2582  const TextChar* end = text + textLen;
   2583  for (const TextChar* c = text; c != end; ++c) {
   2584    if (*c == '$') {
   2585      return c - text;
   2586    }
   2587  }
   2588  return -1;
   2589 }
   2590 
   2591 template <typename StringT>
   2592 int32_t js::GetFirstDollarIndexRawFlat(const StringT* text) {
   2593  uint32_t len = text->length();
   2594 
   2595  JS::AutoCheckCannotGC nogc;
   2596  if (text->hasLatin1Chars()) {
   2597    return GetFirstDollarIndexImpl(text->latin1Chars(nogc), len);
   2598  }
   2599 
   2600  return GetFirstDollarIndexImpl(text->twoByteChars(nogc), len);
   2601 }
   2602 
   2603 template int32_t js::GetFirstDollarIndexRawFlat<JSLinearString>(
   2604    const JSLinearString* text);
   2605 template int32_t js::GetFirstDollarIndexRawFlat<JSOffThreadAtom>(
   2606    const JSOffThreadAtom* text);
   2607 
   2608 bool js::GetFirstDollarIndexRaw(JSContext* cx, JSString* str, int32_t* index) {
   2609  JSLinearString* text = str->ensureLinear(cx);
   2610  if (!text) {
   2611    return false;
   2612  }
   2613 
   2614  *index = GetFirstDollarIndexRawFlat(text);
   2615  return true;
   2616 }
   2617 
   2618 bool js::IsRegExpPrototypeOptimizable(JSContext* cx, unsigned argc, Value* vp) {
   2619  // This can only be called from self-hosted code.
   2620  CallArgs args = CallArgsFromVp(argc, vp);
   2621  MOZ_ASSERT(args.length() == 0);
   2622 
   2623  bool optimizable =
   2624      cx->realm()->realmFuses.optimizeRegExpPrototypeFuse.intact();
   2625  args.rval().setBoolean(optimizable);
   2626  return true;
   2627 }
   2628 
   2629 bool js::IsOptimizableRegExpObject(JSContext* cx, unsigned argc, Value* vp) {
   2630  // This can only be called from self-hosted code.
   2631  CallArgs args = CallArgsFromVp(argc, vp);
   2632  MOZ_ASSERT(args.length() == 1);
   2633  MOZ_ASSERT(args[0].isObject());
   2634 
   2635  JSObject* obj = &args[0].toObject();
   2636 
   2637  bool optimizable = IsOptimizableRegExpObject(obj, cx);
   2638  args.rval().setBoolean(optimizable);
   2639  return true;
   2640 }
   2641 
   2642 /*
   2643 * Pattern match the script to check if it is is indexing into a particular
   2644 * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
   2645 * such cases, which are used by javascript packers (particularly the popular
   2646 * Dean Edwards packer) to efficiently encode large scripts. We only handle the
   2647 * code patterns generated by such packers here.
   2648 */
   2649 bool js::intrinsic_GetElemBaseForLambda(JSContext* cx, unsigned argc,
   2650                                        Value* vp) {
   2651  // This can only be called from self-hosted code.
   2652  CallArgs args = CallArgsFromVp(argc, vp);
   2653  MOZ_ASSERT(args.length() == 1);
   2654 
   2655  JSObject& lambda = args[0].toObject();
   2656  args.rval().setUndefined();
   2657 
   2658  if (!lambda.is<JSFunction>()) {
   2659    return true;
   2660  }
   2661 
   2662  RootedFunction fun(cx, &lambda.as<JSFunction>());
   2663  if (!fun->isInterpreted() || fun->isClassConstructor()) {
   2664    return true;
   2665  }
   2666 
   2667  JSScript* script = JSFunction::getOrCreateScript(cx, fun);
   2668  if (!script) {
   2669    return false;
   2670  }
   2671 
   2672  jsbytecode* pc = script->code();
   2673 
   2674  /*
   2675   * JSOp::GetAliasedVar tells us exactly where to find the base object 'b'.
   2676   * Rule out the (unlikely) possibility of a function with environment
   2677   * objects since it would make our environment walk off.
   2678   */
   2679  if (JSOp(*pc) != JSOp::GetAliasedVar || fun->needsSomeEnvironmentObject()) {
   2680    return true;
   2681  }
   2682  EnvironmentCoordinate ec(pc);
   2683  EnvironmentObject* env = &fun->environment()->as<EnvironmentObject>();
   2684  for (unsigned i = 0; i < ec.hops(); ++i) {
   2685    env = &env->enclosingEnvironment().as<EnvironmentObject>();
   2686  }
   2687  Value b = env->aliasedBinding(ec);
   2688  pc += JSOpLength_GetAliasedVar;
   2689 
   2690  /* Look for 'a' to be the lambda's first argument. */
   2691  if (JSOp(*pc) != JSOp::GetArg || GET_ARGNO(pc) != 0) {
   2692    return true;
   2693  }
   2694  pc += JSOpLength_GetArg;
   2695 
   2696  /* 'b[a]' */
   2697  if (JSOp(*pc) != JSOp::GetElem) {
   2698    return true;
   2699  }
   2700  pc += JSOpLength_GetElem;
   2701 
   2702  /* 'return b[a]' */
   2703  if (JSOp(*pc) != JSOp::Return) {
   2704    return true;
   2705  }
   2706 
   2707  /* 'b' must behave like a normal object. */
   2708  if (!b.isObject()) {
   2709    return true;
   2710  }
   2711 
   2712  JSObject& bobj = b.toObject();
   2713  const JSClass* clasp = bobj.getClass();
   2714  if (!clasp->isNativeObject() || clasp->getOpsLookupProperty() ||
   2715      clasp->getOpsGetProperty()) {
   2716    return true;
   2717  }
   2718 
   2719  args.rval().setObject(bobj);
   2720  return true;
   2721 }
   2722 
   2723 /*
   2724 * Emulates `b[a]` property access, that is detected in GetElemBaseForLambda.
   2725 * It returns the property value only if the property is data property and the
   2726 * property value is a string.  Otherwise it returns undefined.
   2727 */
   2728 bool js::intrinsic_GetStringDataProperty(JSContext* cx, unsigned argc,
   2729                                         Value* vp) {
   2730  CallArgs args = CallArgsFromVp(argc, vp);
   2731  MOZ_ASSERT(args.length() == 2);
   2732 
   2733  JSObject* obj = &args[0].toObject();
   2734  if (!obj->is<NativeObject>()) {
   2735    // The object is already checked to be native in GetElemBaseForLambda,
   2736    // but it can be swapped to another class that is non-native.
   2737    // Return undefined to mark failure to get the property.
   2738    args.rval().setUndefined();
   2739    return true;
   2740  }
   2741 
   2742  // No need to root |obj| because |AtomizeString| can't GC.
   2743  JS::AutoCheckCannotGC nogc;
   2744 
   2745  JSAtom* atom = AtomizeString(cx, args[1].toString());
   2746  if (!atom) {
   2747    return false;
   2748  }
   2749 
   2750  Value v;
   2751  if (GetPropertyPure(cx, obj, AtomToId(atom), &v) && v.isString()) {
   2752    args.rval().set(v);
   2753  } else {
   2754    args.rval().setUndefined();
   2755  }
   2756 
   2757  return true;
   2758 }