[ tor-browser ].git.dasho

String.cpp (143872B)
      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "builtin/String.h"
      8 
      9 #include "mozilla/Attributes.h"
     10 #include "mozilla/CheckedInt.h"
     11 #include "mozilla/Compiler.h"
     12 #if JS_HAS_INTL_API
     13 #  include "mozilla/intl/Locale.h"
     14 #  include "mozilla/intl/String.h"
     15 #endif
     16 #include "mozilla/Likely.h"
     17 #include "mozilla/Maybe.h"
     18 #include "mozilla/PodOperations.h"
     19 #include "mozilla/Range.h"
     20 #include "mozilla/SIMD.h"
     21 #include "mozilla/TextUtils.h"
     22 
     23 #include <algorithm>
     24 #include <limits>
     25 #include <string.h>
     26 #include <type_traits>
     27 
     28 #include "jsnum.h"
     29 #include "jstypes.h"
     30 
     31 #include "builtin/Array.h"
     32 #if JS_HAS_INTL_API
     33 #  include "builtin/intl/Collator.h"
     34 #  include "builtin/intl/CommonFunctions.h"
     35 #  include "builtin/intl/FormatBuffer.h"
     36 #  include "builtin/intl/GlobalIntlData.h"
     37 #  include "builtin/intl/LocaleNegotiation.h"
     38 #endif
     39 #include "builtin/RegExp.h"
     40 #include "gc/GC.h"
     41 #include "jit/InlinableNatives.h"
     42 #include "js/Conversions.h"
     43 #include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_*
     44 #if !JS_HAS_INTL_API
     45 #  include "js/LocaleSensitive.h"
     46 #endif
     47 #include "js/Prefs.h"
     48 #include "js/Printer.h"
     49 #include "js/PropertyAndElement.h"  // JS_DefineFunctions
     50 #include "js/PropertySpec.h"
     51 #include "js/StableStringChars.h"
     52 #include "js/UniquePtr.h"
     53 #include "util/StringBuilder.h"
     54 #include "util/Unicode.h"
     55 #include "vm/GlobalObject.h"
     56 #include "vm/JSContext.h"
     57 #include "vm/JSObject.h"
     58 #include "vm/RegExpObject.h"
     59 #include "vm/SelfHosting.h"
     60 #include "vm/StaticStrings.h"
     61 #include "vm/ToSource.h"  // js::ValueToSource
     62 
     63 #include "vm/GeckoProfiler-inl.h"
     64 #include "vm/NativeObject-inl.h"
     65 #include "vm/StringObject-inl.h"
     66 #include "vm/StringType-inl.h"
     67 
     68 using namespace js;
     69 
     70 using mozilla::AsciiAlphanumericToNumber;
     71 using mozilla::CheckedInt;
     72 using mozilla::EnsureUtf16ValiditySpan;
     73 using mozilla::IsAsciiHexDigit;
     74 using mozilla::PodCopy;
     75 using mozilla::RangedPtr;
     76 using mozilla::SIMD;
     77 using mozilla::Span;
     78 using mozilla::Utf16ValidUpTo;
     79 
     80 using JS::AutoCheckCannotGC;
     81 using JS::AutoStableStringChars;
     82 
     83 static JSLinearString* ArgToLinearString(JSContext* cx, const CallArgs& args,
     84                                         unsigned argno) {
     85  if (argno >= args.length()) {
     86    return cx->names().undefined;
     87  }
     88 
     89  JSString* str = ToString<CanGC>(cx, args[argno]);
     90  if (!str) {
     91    return nullptr;
     92  }
     93 
     94  return str->ensureLinear(cx);
     95 }
     96 
     97 /*
     98 * Forward declarations for URI encode/decode and helper routines
     99 */
    100 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
    101 
    102 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
    103 
    104 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
    105 
    106 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
    107 
    108 /*
    109 * Global string methods
    110 */
    111 
    112 /* ES5 B.2.1 */
    113 template <typename CharT>
    114 static bool Escape(JSContext* cx, const CharT* chars, uint32_t length,
    115                   StringChars<Latin1Char>& newChars, uint32_t* newLengthOut) {
    116  // clang-format off
    117    static const uint8_t shouldPassThrough[128] = {
    118         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    119         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    120         0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,       /*    !"#$%&'()*+,-./  */
    121         1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,       /*   0123456789:;<=>?  */
    122         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   @ABCDEFGHIJKLMNO  */
    123         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /*   PQRSTUVWXYZ[\]^_  */
    124         0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   `abcdefghijklmno  */
    125         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,       /*   pqrstuvwxyz{\}~  DEL */
    126    };
    127  // clang-format on
    128 
    129  /* Take a first pass and see how big the result string will need to be. */
    130  uint32_t newLength = length;
    131  for (size_t i = 0; i < length; i++) {
    132    char16_t ch = chars[i];
    133    if (ch < 128 && shouldPassThrough[ch]) {
    134      continue;
    135    }
    136 
    137    /*
    138     * newlength is incremented below by at most 5 and at this point it must
    139     * be a valid string length, so this should never overflow uint32_t.
    140     */
    141    static_assert(JSString::MAX_LENGTH < UINT32_MAX - 5,
    142                  "Adding 5 to valid string length should not overflow");
    143 
    144    MOZ_ASSERT(newLength <= JSString::MAX_LENGTH);
    145 
    146    /* The character will be encoded as %XX or %uXXXX. */
    147    newLength += (ch < 256) ? 2 : 5;
    148 
    149    if (MOZ_UNLIKELY(newLength > JSString::MAX_LENGTH)) {
    150      ReportAllocationOverflow(cx);
    151      return false;
    152    }
    153  }
    154 
    155  if (newLength == length) {
    156    *newLengthOut = newLength;
    157    return true;
    158  }
    159 
    160  if (!newChars.maybeAlloc(cx, newLength)) {
    161    return false;
    162  }
    163 
    164  static const char digits[] = "0123456789ABCDEF";
    165 
    166  JS::AutoCheckCannotGC nogc;
    167  Latin1Char* rawNewChars = newChars.data(nogc);
    168  size_t i, ni;
    169  for (i = 0, ni = 0; i < length; i++) {
    170    char16_t ch = chars[i];
    171    if (ch < 128 && shouldPassThrough[ch]) {
    172      rawNewChars[ni++] = ch;
    173    } else if (ch < 256) {
    174      rawNewChars[ni++] = '%';
    175      rawNewChars[ni++] = digits[ch >> 4];
    176      rawNewChars[ni++] = digits[ch & 0xF];
    177    } else {
    178      rawNewChars[ni++] = '%';
    179      rawNewChars[ni++] = 'u';
    180      rawNewChars[ni++] = digits[ch >> 12];
    181      rawNewChars[ni++] = digits[(ch & 0xF00) >> 8];
    182      rawNewChars[ni++] = digits[(ch & 0xF0) >> 4];
    183      rawNewChars[ni++] = digits[ch & 0xF];
    184    }
    185  }
    186  MOZ_ASSERT(ni == newLength);
    187 
    188  *newLengthOut = newLength;
    189  return true;
    190 }
    191 
    192 static bool str_escape(JSContext* cx, unsigned argc, Value* vp) {
    193  AutoJSMethodProfilerEntry pseudoFrame(cx, "escape");
    194  CallArgs args = CallArgsFromVp(argc, vp);
    195 
    196  Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0));
    197  if (!str) {
    198    return false;
    199  }
    200 
    201  StringChars<Latin1Char> newChars(cx);
    202  uint32_t newLength = 0;  // initialize to silence GCC warning
    203  if (str->hasLatin1Chars()) {
    204    AutoCheckCannotGC nogc;
    205    if (!Escape(cx, str->latin1Chars(nogc), str->length(), newChars,
    206                &newLength)) {
    207      return false;
    208    }
    209  } else {
    210    AutoCheckCannotGC nogc;
    211    if (!Escape(cx, str->twoByteChars(nogc), str->length(), newChars,
    212                &newLength)) {
    213      return false;
    214    }
    215  }
    216 
    217  // Return input if no characters need to be escaped.
    218  if (newLength == str->length()) {
    219    args.rval().setString(str);
    220    return true;
    221  }
    222 
    223  JSString* res = newChars.toStringDontDeflateNonStatic<CanGC>(cx, newLength);
    224  if (!res) {
    225    return false;
    226  }
    227 
    228  args.rval().setString(res);
    229  return true;
    230 }
    231 
    232 template <typename CharT>
    233 static inline bool Unhex4(const RangedPtr<const CharT> chars,
    234                          char16_t* result) {
    235  CharT a = chars[0], b = chars[1], c = chars[2], d = chars[3];
    236 
    237  if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b) && IsAsciiHexDigit(c) &&
    238        IsAsciiHexDigit(d))) {
    239    return false;
    240  }
    241 
    242  char16_t unhex = AsciiAlphanumericToNumber(a);
    243  unhex = (unhex << 4) + AsciiAlphanumericToNumber(b);
    244  unhex = (unhex << 4) + AsciiAlphanumericToNumber(c);
    245  unhex = (unhex << 4) + AsciiAlphanumericToNumber(d);
    246  *result = unhex;
    247  return true;
    248 }
    249 
    250 template <typename CharT>
    251 static inline bool Unhex2(const RangedPtr<const CharT> chars,
    252                          char16_t* result) {
    253  CharT a = chars[0], b = chars[1];
    254 
    255  if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b))) {
    256    return false;
    257  }
    258 
    259  *result = (AsciiAlphanumericToNumber(a) << 4) + AsciiAlphanumericToNumber(b);
    260  return true;
    261 }
    262 
    263 template <typename CharT>
    264 static bool Unescape(StringBuilder& sb,
    265                     const mozilla::Range<const CharT> chars) {
    266  // Step 2.
    267  uint32_t length = chars.length();
    268 
    269  /*
    270   * Note that the spec algorithm has been optimized to avoid building
    271   * a string in the case where no escapes are present.
    272   */
    273  bool building = false;
    274 
    275 #define ENSURE_BUILDING                            \
    276  do {                                             \
    277    if (!building) {                               \
    278      building = true;                             \
    279      if (!sb.reserve(length)) return false;       \
    280      sb.infallibleAppend(chars.begin().get(), k); \
    281    }                                              \
    282  } while (false);
    283 
    284  // Step 4.
    285  uint32_t k = 0;
    286 
    287  // Step 5.
    288  while (k < length) {
    289    // Step 5.a.
    290    char16_t c = chars[k];
    291 
    292    // Step 5.b.
    293    if (c == '%') {
    294      static_assert(JSString::MAX_LENGTH < UINT32_MAX - 6,
    295                    "String length is not near UINT32_MAX");
    296 
    297      // Steps 5.b.i-ii.
    298      if (k + 6 <= length && chars[k + 1] == 'u') {
    299        if (Unhex4(chars.begin() + k + 2, &c)) {
    300          ENSURE_BUILDING
    301          k += 5;
    302        }
    303      } else if (k + 3 <= length) {
    304        if (Unhex2(chars.begin() + k + 1, &c)) {
    305          ENSURE_BUILDING
    306          k += 2;
    307        }
    308      }
    309    }
    310 
    311    // Step 5.c.
    312    if (building && !sb.append(c)) {
    313      return false;
    314    }
    315 
    316    // Step 5.d.
    317    k += 1;
    318  }
    319 
    320  return true;
    321 #undef ENSURE_BUILDING
    322 }
    323 
    324 // ES2018 draft rev f83aa38282c2a60c6916ebc410bfdf105a0f6a54
    325 // B.2.1.2 unescape ( string )
    326 static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) {
    327  AutoJSMethodProfilerEntry pseudoFrame(cx, "unescape");
    328  CallArgs args = CallArgsFromVp(argc, vp);
    329 
    330  // Step 1.
    331  Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0));
    332  if (!str) {
    333    return false;
    334  }
    335 
    336  // Step 3.
    337  JSStringBuilder sb(cx);
    338  if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) {
    339    return false;
    340  }
    341 
    342  // Steps 2, 4-5.
    343  bool unescapeFailed = false;
    344  if (str->hasLatin1Chars()) {
    345    AutoCheckCannotGC nogc;
    346    unescapeFailed = !Unescape(sb, str->latin1Range(nogc));
    347  } else {
    348    AutoCheckCannotGC nogc;
    349    unescapeFailed = !Unescape(sb, str->twoByteRange(nogc));
    350  }
    351  if (unescapeFailed) {
    352    return false;
    353  }
    354 
    355  // Step 6.
    356  JSLinearString* result;
    357  if (!sb.empty()) {
    358    result = sb.finishString();
    359    if (!result) {
    360      return false;
    361    }
    362  } else {
    363    result = str;
    364  }
    365 
    366  args.rval().setString(result);
    367  return true;
    368 }
    369 
    370 static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) {
    371  CallArgs args = CallArgsFromVp(argc, vp);
    372  JSString* str = ValueToSource(cx, args.get(0));
    373  if (!str) {
    374    return false;
    375  }
    376 
    377  args.rval().setString(str);
    378  return true;
    379 }
    380 
    381 static const JSFunctionSpec string_functions[] = {
    382    JS_FN("escape", str_escape, 1, JSPROP_RESOLVING),
    383    JS_FN("unescape", str_unescape, 1, JSPROP_RESOLVING),
    384    JS_FN("uneval", str_uneval, 1, JSPROP_RESOLVING),
    385    JS_FN("decodeURI", str_decodeURI, 1, JSPROP_RESOLVING),
    386    JS_FN("encodeURI", str_encodeURI, 1, JSPROP_RESOLVING),
    387    JS_FN("decodeURIComponent", str_decodeURI_Component, 1, JSPROP_RESOLVING),
    388    JS_FN("encodeURIComponent", str_encodeURI_Component, 1, JSPROP_RESOLVING),
    389    JS_FS_END,
    390 };
    391 
    392 static const unsigned STRING_ELEMENT_ATTRS =
    393    JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
    394 
    395 static bool str_enumerate(JSContext* cx, HandleObject obj) {
    396  RootedString str(cx, obj->as<StringObject>().unbox());
    397  js::StaticStrings& staticStrings = cx->staticStrings();
    398 
    399  RootedValue value(cx);
    400  for (size_t i = 0, length = str->length(); i < length; i++) {
    401    JSString* str1 = staticStrings.getUnitStringForElement(cx, str, i);
    402    if (!str1) {
    403      return false;
    404    }
    405    value.setString(str1);
    406    if (!DefineDataElement(cx, obj, i, value,
    407                           STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
    408      return false;
    409    }
    410  }
    411 
    412  return true;
    413 }
    414 
    415 static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) {
    416  // str_resolve ignores non-integer ids.
    417  return id.isInt();
    418 }
    419 
    420 static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id,
    421                        bool* resolvedp) {
    422  if (!id.isInt()) {
    423    return true;
    424  }
    425 
    426  RootedString str(cx, obj->as<StringObject>().unbox());
    427 
    428  int32_t slot = id.toInt();
    429  if ((size_t)slot < str->length()) {
    430    JSString* str1 =
    431        cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
    432    if (!str1) {
    433      return false;
    434    }
    435    RootedValue value(cx, StringValue(str1));
    436    if (!DefineDataElement(cx, obj, uint32_t(slot), value,
    437                           STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
    438      return false;
    439    }
    440    *resolvedp = true;
    441  }
    442  return true;
    443 }
    444 
    445 static const JSClassOps StringObjectClassOps = {
    446    nullptr,         // addProperty
    447    nullptr,         // delProperty
    448    str_enumerate,   // enumerate
    449    nullptr,         // newEnumerate
    450    str_resolve,     // resolve
    451    str_mayResolve,  // mayResolve
    452    nullptr,         // finalize
    453    nullptr,         // call
    454    nullptr,         // construct
    455    nullptr,         // trace
    456 };
    457 
    458 const JSClass StringObject::class_ = {
    459    "String",
    460    JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
    461        JSCLASS_HAS_CACHED_PROTO(JSProto_String),
    462    &StringObjectClassOps,
    463    &StringObject::classSpec_,
    464 };
    465 
    466 /*
    467 * Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)|
    468 * from nearly all String.prototype.* functions.
    469 */
    470 static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction(
    471    JSContext* cx, const char* funName, HandleValue thisv) {
    472  if (thisv.isString()) {
    473    return thisv.toString();
    474  }
    475 
    476  if (thisv.isObject()) {
    477    if (thisv.toObject().is<StringObject>()) {
    478      StringObject* nobj = &thisv.toObject().as<StringObject>();
    479      // We have to make sure that the ToPrimitive call from ToString
    480      // would be unobservable.
    481      if (HasNoToPrimitiveMethodPure(nobj, cx) &&
    482          HasNativeMethodPure(nobj, cx->names().toString, str_toString, cx)) {
    483        return nobj->unbox();
    484      }
    485    }
    486  } else if (thisv.isNullOrUndefined()) {
    487    JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
    488                              JSMSG_INCOMPATIBLE_PROTO, "String", funName,
    489                              thisv.isNull() ? "null" : "undefined");
    490    return nullptr;
    491  }
    492 
    493  return ToStringSlow<CanGC>(cx, thisv);
    494 }
    495 
    496 MOZ_ALWAYS_INLINE bool IsString(HandleValue v) {
    497  return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
    498 }
    499 
    500 MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) {
    501  MOZ_ASSERT(IsString(args.thisv()));
    502 
    503  JSString* str = ToString<CanGC>(cx, args.thisv());
    504  if (!str) {
    505    return false;
    506  }
    507 
    508  UniqueChars quoted = QuoteString(cx, str, '"');
    509  if (!quoted) {
    510    return false;
    511  }
    512 
    513  JSStringBuilder sb(cx);
    514  if (!sb.append("(new String(") ||
    515      !sb.append(quoted.get(), strlen(quoted.get())) || !sb.append("))")) {
    516    return false;
    517  }
    518 
    519  JSString* result = sb.finishString();
    520  if (!result) {
    521    return false;
    522  }
    523  args.rval().setString(result);
    524  return true;
    525 }
    526 
    527 static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) {
    528  CallArgs args = CallArgsFromVp(argc, vp);
    529  return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
    530 }
    531 
    532 MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) {
    533  MOZ_ASSERT(IsString(args.thisv()));
    534 
    535  args.rval().setString(
    536      args.thisv().isString()
    537          ? args.thisv().toString()
    538          : args.thisv().toObject().as<StringObject>().unbox());
    539  return true;
    540 }
    541 
    542 bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) {
    543  CallArgs args = CallArgsFromVp(argc, vp);
    544  return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
    545 }
    546 
    547 template <typename DestChar, typename SrcChar>
    548 static inline void CopyChars(DestChar* destChars, const SrcChar* srcChars,
    549                             size_t length) {
    550  if constexpr (std::is_same_v<DestChar, SrcChar>) {
    551 #if MOZ_IS_GCC
    552    // Directly call memcpy to work around bug 1863131.
    553    memcpy(destChars, srcChars, length * sizeof(DestChar));
    554 #else
    555    PodCopy(destChars, srcChars, length);
    556 #endif
    557  } else {
    558    for (size_t i = 0; i < length; i++) {
    559      destChars[i] = srcChars[i];
    560    }
    561  }
    562 }
    563 
    564 template <typename CharT>
    565 static inline void CopyChars(CharT* to, const JSLinearString* from,
    566                             size_t begin, size_t length) {
    567  MOZ_ASSERT(begin + length <= from->length());
    568 
    569  JS::AutoCheckCannotGC nogc;
    570  if (from->hasLatin1Chars()) {
    571    CopyChars(to, from->latin1Chars(nogc) + begin, length);
    572  } else {
    573    CopyChars(to, from->twoByteChars(nogc) + begin, length);
    574  }
    575 }
    576 
    577 template <typename CharT>
    578 static JSLinearString* SubstringInlineString(JSContext* cx,
    579                                             Handle<JSLinearString*> left,
    580                                             Handle<JSLinearString*> right,
    581                                             size_t begin, size_t lhsLength,
    582                                             size_t rhsLength) {
    583  constexpr size_t MaxLength = std::is_same_v<CharT, Latin1Char>
    584                                   ? JSFatInlineString::MAX_LENGTH_LATIN1
    585                                   : JSFatInlineString::MAX_LENGTH_TWO_BYTE;
    586 
    587  size_t length = lhsLength + rhsLength;
    588  MOZ_ASSERT(length <= MaxLength, "total length fits in stack chars");
    589  MOZ_ASSERT(JSInlineString::lengthFits<CharT>(length));
    590 
    591  CharT chars[MaxLength] = {};
    592 
    593  CopyChars(chars, left, begin, lhsLength);
    594  CopyChars(chars + lhsLength, right, 0, rhsLength);
    595 
    596  if (auto* str = cx->staticStrings().lookup(chars, length)) {
    597    return str;
    598  }
    599  return NewInlineString<CanGC>(cx, chars, length);
    600 }
    601 
    602 JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
    603                              int32_t lengthInt) {
    604  MOZ_ASSERT(0 <= beginInt);
    605  MOZ_ASSERT(0 <= lengthInt);
    606  MOZ_ASSERT(uint32_t(beginInt) <= str->length());
    607  MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
    608 
    609  uint32_t begin = beginInt;
    610  uint32_t len = lengthInt;
    611 
    612  /*
    613   * Optimization for one level deep ropes.
    614   * This is common for the following pattern:
    615   *
    616   * while() {
    617   *   text = text.substr(0, x) + "bla" + text.substr(x)
    618   *   text.charCodeAt(x + 1)
    619   * }
    620   */
    621  if (str->isRope()) {
    622    JSRope* rope = &str->asRope();
    623 
    624    if (rope->length() == len) {
    625      // Substring is the full rope.
    626      MOZ_ASSERT(begin == 0);
    627      return rope;
    628    }
    629 
    630    if (begin + len <= rope->leftChild()->length()) {
    631      // Substring is fully contained within the rope's left child.
    632      return NewDependentString(cx, rope->leftChild(), begin, len);
    633    }
    634 
    635    if (begin >= rope->leftChild()->length()) {
    636      // Substring is fully contained within the rope's right child.
    637      begin -= rope->leftChild()->length();
    638      return NewDependentString(cx, rope->rightChild(), begin, len);
    639    }
    640 
    641    // The substring spans both children. Avoid flattening the rope if the
    642    // children are both linear and the substring fits in an inline string.
    643    //
    644    // Note: we could handle longer substrings by allocating a new rope here,
    645    // but this can result in a lot more rope flattening later on. It's safer to
    646    // flatten the rope in this case. See bug 1922926.
    647 
    648    MOZ_ASSERT(begin < rope->leftChild()->length() &&
    649               begin + len > rope->leftChild()->length());
    650 
    651    bool fitsInline = rope->hasLatin1Chars()
    652                          ? JSInlineString::lengthFits<Latin1Char>(len)
    653                          : JSInlineString::lengthFits<char16_t>(len);
    654    if (fitsInline && rope->leftChild()->isLinear() &&
    655        rope->rightChild()->isLinear()) {
    656      Rooted<JSLinearString*> left(cx, &rope->leftChild()->asLinear());
    657      Rooted<JSLinearString*> right(cx, &rope->rightChild()->asLinear());
    658 
    659      size_t lhsLength = left->length() - begin;
    660      size_t rhsLength = len - lhsLength;
    661 
    662      if (rope->hasLatin1Chars()) {
    663        return SubstringInlineString<Latin1Char>(cx, left, right, begin,
    664                                                 lhsLength, rhsLength);
    665      }
    666      return SubstringInlineString<char16_t>(cx, left, right, begin, lhsLength,
    667                                             rhsLength);
    668    }
    669  }
    670 
    671  return NewDependentString(cx, str, begin, len);
    672 }
    673 
    674 /**
    675 * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
    676 * depending on its context:
    677 * When it's preceded by a cased character and not followed by another cased
    678 * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
    679 * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
    680 *
    681 * Unicode 9.0, §3.13 Default Case Algorithms
    682 */
    683 static char16_t Final_Sigma(const char16_t* chars, size_t length,
    684                            size_t index) {
    685  MOZ_ASSERT(index < length);
    686  MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
    687  MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
    688             unicode::GREEK_SMALL_LETTER_SIGMA);
    689 
    690 #if JS_HAS_INTL_API
    691  // Tell the analysis the BinaryProperty.contains function pointer called by
    692  // mozilla::intl::String::Is{CaseIgnorable, Cased} cannot GC.
    693  JS::AutoSuppressGCAnalysis nogc;
    694 
    695  bool precededByCased = false;
    696  for (size_t i = index; i > 0;) {
    697    char16_t c = chars[--i];
    698    char32_t codePoint = c;
    699    if (unicode::IsTrailSurrogate(c) && i > 0) {
    700      char16_t lead = chars[i - 1];
    701      if (unicode::IsLeadSurrogate(lead)) {
    702        codePoint = unicode::UTF16Decode(lead, c);
    703        i--;
    704      }
    705    }
    706 
    707    // Ignore any characters with the property Case_Ignorable.
    708    // NB: We need to skip over all Case_Ignorable characters, even when
    709    // they also have the Cased binary property.
    710    if (mozilla::intl::String::IsCaseIgnorable(codePoint)) {
    711      continue;
    712    }
    713 
    714    precededByCased = mozilla::intl::String::IsCased(codePoint);
    715    break;
    716  }
    717  if (!precededByCased) {
    718    return unicode::GREEK_SMALL_LETTER_SIGMA;
    719  }
    720 
    721  bool followedByCased = false;
    722  for (size_t i = index + 1; i < length;) {
    723    char16_t c = chars[i++];
    724    char32_t codePoint = c;
    725    if (unicode::IsLeadSurrogate(c) && i < length) {
    726      char16_t trail = chars[i];
    727      if (unicode::IsTrailSurrogate(trail)) {
    728        codePoint = unicode::UTF16Decode(c, trail);
    729        i++;
    730      }
    731    }
    732 
    733    // Ignore any characters with the property Case_Ignorable.
    734    // NB: We need to skip over all Case_Ignorable characters, even when
    735    // they also have the Cased binary property.
    736    if (mozilla::intl::String::IsCaseIgnorable(codePoint)) {
    737      continue;
    738    }
    739 
    740    followedByCased = mozilla::intl::String::IsCased(codePoint);
    741    break;
    742  }
    743  if (!followedByCased) {
    744    return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
    745  }
    746 #endif
    747 
    748  return unicode::GREEK_SMALL_LETTER_SIGMA;
    749 }
    750 
    751 // If |srcLength == destLength| is true, the destination buffer was allocated
    752 // with the same size as the source buffer. When we append characters which
    753 // have special casing mappings, we test |srcLength == destLength| to decide
    754 // if we need to back out and reallocate a sufficiently large destination
    755 // buffer. Otherwise the destination buffer was allocated with the correct
    756 // size to hold all lower case mapped characters, i.e.
    757 // |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
    758 template <typename CharT>
    759 static size_t ToLowerCaseImpl(CharT* destChars, const CharT* srcChars,
    760                              size_t startIndex, size_t srcLength,
    761                              size_t destLength) {
    762  MOZ_ASSERT(startIndex < srcLength);
    763  MOZ_ASSERT(srcLength <= destLength);
    764  if constexpr (std::is_same_v<CharT, Latin1Char>) {
    765    MOZ_ASSERT(srcLength == destLength);
    766  }
    767 
    768  size_t j = startIndex;
    769  for (size_t i = startIndex; i < srcLength; i++) {
    770    CharT c = srcChars[i];
    771    if constexpr (!std::is_same_v<CharT, Latin1Char>) {
    772      if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
    773        char16_t trail = srcChars[i + 1];
    774        if (unicode::IsTrailSurrogate(trail)) {
    775          trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
    776          destChars[j++] = c;
    777          destChars[j++] = trail;
    778          i++;
    779          continue;
    780        }
    781      }
    782 
    783      // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
    784      // lowercases to <U+0069 U+0307>.
    785      if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
    786        // Return if the output buffer is too small.
    787        if (srcLength == destLength) {
    788          return i;
    789        }
    790 
    791        destChars[j++] = CharT('i');
    792        destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
    793        continue;
    794      }
    795 
    796      // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
    797      // one of two codepoints depending on context.
    798      if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
    799        destChars[j++] = Final_Sigma(srcChars, srcLength, i);
    800        continue;
    801      }
    802    }
    803 
    804    c = unicode::ToLowerCase(c);
    805    destChars[j++] = c;
    806  }
    807 
    808  MOZ_ASSERT(j == destLength);
    809  return srcLength;
    810 }
    811 
    812 static size_t ToLowerCaseLength(const char16_t* chars, size_t startIndex,
    813                                size_t length) {
    814  size_t lowerLength = length;
    815  for (size_t i = startIndex; i < length; i++) {
    816    char16_t c = chars[i];
    817 
    818    // U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
    819    if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
    820      lowerLength += 1;
    821    }
    822  }
    823  return lowerLength;
    824 }
    825 
    826 template <typename CharT>
    827 static JSLinearString* ToLowerCase(JSContext* cx, JSLinearString* str) {
    828  // Unlike toUpperCase, toLowerCase has the nice invariant that if the
    829  // input is a Latin-1 string, the output is also a Latin-1 string.
    830 
    831  StringChars<CharT> newChars(cx);
    832 
    833  const size_t length = str->length();
    834  size_t resultLength;
    835  {
    836    AutoCheckCannotGC nogc;
    837    const CharT* chars = str->chars<CharT>(nogc);
    838 
    839    // We don't need extra special casing checks in the loop below,
    840    // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
    841    // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
    842    MOZ_ASSERT(unicode::ChangesWhenLowerCased(
    843                   unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
    844               "U+0130 has a simple lower case mapping");
    845    MOZ_ASSERT(
    846        unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA),
    847        "U+03A3 has a simple lower case mapping");
    848 
    849    // One element Latin-1 strings can be directly retrieved from the
    850    // static strings cache.
    851    if constexpr (std::is_same_v<CharT, Latin1Char>) {
    852      if (length == 1) {
    853        CharT lower = unicode::ToLowerCase(chars[0]);
    854        MOZ_ASSERT(StaticStrings::hasUnit(lower));
    855 
    856        return cx->staticStrings().getUnit(lower);
    857      }
    858    }
    859 
    860    // Look for the first character that changes when lowercased.
    861    size_t i = 0;
    862    for (; i < length; i++) {
    863      CharT c = chars[i];
    864      if constexpr (!std::is_same_v<CharT, Latin1Char>) {
    865        if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
    866          CharT trail = chars[i + 1];
    867          if (unicode::IsTrailSurrogate(trail)) {
    868            if (unicode::ChangesWhenLowerCasedNonBMP(c, trail)) {
    869              break;
    870            }
    871 
    872            i++;
    873            continue;
    874          }
    875        }
    876      }
    877      if (unicode::ChangesWhenLowerCased(c)) {
    878        break;
    879      }
    880    }
    881 
    882    // If no character needs to change, return the input string.
    883    if (i == length) {
    884      return str;
    885    }
    886 
    887    resultLength = length;
    888    if (!newChars.maybeAlloc(cx, resultLength)) {
    889      return nullptr;
    890    }
    891 
    892    PodCopy(newChars.data(nogc), chars, i);
    893 
    894    size_t readChars =
    895        ToLowerCaseImpl(newChars.data(nogc), chars, i, length, resultLength);
    896    if constexpr (!std::is_same_v<CharT, Latin1Char>) {
    897      if (readChars < length) {
    898        resultLength = ToLowerCaseLength(chars, readChars, length);
    899 
    900        if (!newChars.maybeRealloc(cx, length, resultLength)) {
    901          return nullptr;
    902        }
    903 
    904        MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.data(nogc), chars,
    905                                                  readChars, length,
    906                                                  resultLength));
    907      }
    908    } else {
    909      MOZ_ASSERT(readChars == length,
    910                 "Latin-1 strings don't have special lower case mappings");
    911    }
    912  }
    913 
    914  return newChars.template toStringDontDeflate<CanGC>(cx, resultLength);
    915 }
    916 
    917 JSLinearString* js::StringToLowerCase(JSContext* cx, JSString* string) {
    918  JSLinearString* linear = string->ensureLinear(cx);
    919  if (!linear) {
    920    return nullptr;
    921  }
    922 
    923  if (linear->hasLatin1Chars()) {
    924    return ToLowerCase<Latin1Char>(cx, linear);
    925  }
    926  return ToLowerCase<char16_t>(cx, linear);
    927 }
    928 
    929 static bool str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) {
    930  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "toLowerCase");
    931  CallArgs args = CallArgsFromVp(argc, vp);
    932 
    933  JSString* str = ToStringForStringFunction(cx, "toLowerCase", args.thisv());
    934  if (!str) {
    935    return false;
    936  }
    937 
    938  JSString* result = StringToLowerCase(cx, str);
    939  if (!result) {
    940    return false;
    941  }
    942 
    943  args.rval().setString(result);
    944  return true;
    945 }
    946 
    947 #if JS_HAS_INTL_API
    948 // Lithuanian, Turkish, and Azeri have language dependent case mappings.
    949 static constexpr char LanguagesWithSpecialCasing[][3] = {"lt", "tr", "az"};
    950 
    951 bool js::LocaleHasDefaultCaseMapping(const char* locale) {
    952  MOZ_ASSERT(locale);
    953 
    954  size_t languageSubtagLength;
    955  if (auto* sep = strchr(locale, '-')) {
    956    languageSubtagLength = sep - locale;
    957  } else {
    958    languageSubtagLength = std::strlen(locale);
    959  }
    960 
    961  // Invalid locale identifiers default to the last-ditch locale "en-GB", which
    962  // has default case mapping.
    963  mozilla::Span<const char> span{locale, languageSubtagLength};
    964  {
    965    // Tell the analysis the |IsStructurallyValidLanguageTag| function can't GC.
    966    JS::AutoSuppressGCAnalysis nogc;
    967    if (!mozilla::intl::IsStructurallyValidLanguageTag(span)) {
    968      return true;
    969    }
    970  }
    971 
    972  mozilla::intl::LanguageSubtag subtag{span};
    973 
    974  // Canonical case for the language subtag is lower-case
    975  {
    976    // Tell the analysis the |ToLowerCase| function can't GC.
    977    JS::AutoSuppressGCAnalysis nogc;
    978 
    979    subtag.ToLowerCase();
    980  }
    981 
    982  // Replace outdated language subtags. Skips complex language mappings, which
    983  // is okay because none of the languages with special casing are affected by
    984  // complex language mapping.
    985  {
    986    // Tell the analysis the |LanguageMapping| function can't GC.
    987    JS::AutoSuppressGCAnalysis nogc;
    988 
    989    (void)mozilla::intl::Locale::LanguageMapping(subtag);
    990  }
    991 
    992  // Check for languages which don't use the default case mapping algorithm.
    993  for (const auto& language : LanguagesWithSpecialCasing) {
    994    if (subtag.EqualTo(language)) {
    995      return false;
    996    }
    997  }
    998 
    999  // Simple locale with default case mapping. (Or an invalid locale which
   1000  // defaults to the last-ditch locale "en-GB".)
   1001  return true;
   1002 }
   1003 
   1004 static const char* CaseMappingLocale(JSLinearString* locale) {
   1005  MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
   1006 
   1007  // All strings in |languagesWithSpecialCasing| are of length two, so we
   1008  // only need to compare the first two characters to find a matching locale.
   1009  // ES2017 Intl, §9.2.2 BestAvailableLocale
   1010  if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') {
   1011    for (const auto& language : LanguagesWithSpecialCasing) {
   1012      if (locale->latin1OrTwoByteChar(0) == language[0] &&
   1013          locale->latin1OrTwoByteChar(1) == language[1]) {
   1014        return language;
   1015      }
   1016    }
   1017  }
   1018 
   1019  return nullptr;
   1020 }
   1021 
   1022 enum class TargetCase { Lower, Upper };
   1023 
   1024 /**
   1025 * TransformCase ( S, locales, targetCase )
   1026 */
   1027 static JSLinearString* TransformCase(JSContext* cx, Handle<JSString*> string,
   1028                                     Handle<Value> locales,
   1029                                     TargetCase targetCase) {
   1030  // Step 1.
   1031  Rooted<intl::LocalesList> requestedLocales(cx, cx);
   1032  if (!intl::CanonicalizeLocaleList(cx, locales, &requestedLocales)) {
   1033    return nullptr;
   1034  }
   1035 
   1036  // Trivial case: When the input is empty, directly return the empty string.
   1037  if (string->empty()) {
   1038    return cx->emptyString();
   1039  }
   1040 
   1041  // Steps 2-3.
   1042  Rooted<JSLinearString*> requestedLocale(cx);
   1043  if (!requestedLocales.empty()) {
   1044    requestedLocale = requestedLocales[0];
   1045  } else {
   1046    requestedLocale = cx->global()->globalIntlData().defaultLocale(cx);
   1047    if (!requestedLocale) {
   1048      return nullptr;
   1049    }
   1050  }
   1051 
   1052  // Steps 4-10.
   1053  const char* locale = CaseMappingLocale(requestedLocale);
   1054  if (!locale) {
   1055    // Call the default case conversion methods for language independent casing.
   1056    return targetCase == TargetCase::Lower ? StringToLowerCase(cx, string)
   1057                                           : StringToUpperCase(cx, string);
   1058  }
   1059 
   1060  AutoStableStringChars inputChars(cx);
   1061  if (!inputChars.initTwoByte(cx, string)) {
   1062    return nullptr;
   1063  }
   1064  mozilla::Range<const char16_t> input = inputChars.twoByteRange();
   1065 
   1066  // Note: maximum case mapping length is three characters, so the result
   1067  // length might be > INT32_MAX. ICU will fail in this case.
   1068  static_assert(JSString::MAX_LENGTH <= INT32_MAX,
   1069                "String length must fit in int32_t for ICU");
   1070 
   1071  static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
   1072 
   1073  intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx);
   1074 
   1075  auto ok =
   1076      targetCase == TargetCase::Lower
   1077          ? mozilla::intl::String::ToLocaleLowerCase(locale, input, buffer)
   1078          : mozilla::intl::String::ToLocaleUpperCase(locale, input, buffer);
   1079  if (ok.isErr()) {
   1080    intl::ReportInternalError(cx, ok.unwrapErr());
   1081    return nullptr;
   1082  }
   1083 
   1084  return buffer.toString(cx);
   1085 }
   1086 #endif
   1087 
   1088 static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
   1089  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype",
   1090                                        "toLocaleLowerCase");
   1091  CallArgs args = CallArgsFromVp(argc, vp);
   1092 
   1093  // Steps 1-2.
   1094  Rooted<JSString*> str(
   1095      cx, ToStringForStringFunction(cx, "toLocaleLowerCase", args.thisv()));
   1096  if (!str) {
   1097    return false;
   1098  }
   1099 
   1100 #if JS_HAS_INTL_API
   1101  // Step 3.
   1102  auto* result = TransformCase(cx, str, args.get(0), TargetCase::Lower);
   1103  if (!result) {
   1104    return false;
   1105  }
   1106 
   1107  args.rval().setString(result);
   1108  return true;
   1109 #else
   1110  /*
   1111   * Forcefully ignore the first (or any) argument and return toLowerCase(),
   1112   * ECMA has reserved that argument, presumably for defining the locale.
   1113   */
   1114  if (cx->runtime()->localeCallbacks &&
   1115      cx->runtime()->localeCallbacks->localeToLowerCase) {
   1116    Rooted<Value> result(cx);
   1117    if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) {
   1118      return false;
   1119    }
   1120 
   1121    args.rval().set(result);
   1122    return true;
   1123  }
   1124 
   1125  Rooted<JSLinearString*> linear(cx, str->ensureLinear(cx));
   1126  if (!linear) {
   1127    return false;
   1128  }
   1129 
   1130  JSString* result = StringToLowerCase(cx, linear);
   1131  if (!result) {
   1132    return false;
   1133  }
   1134 
   1135  args.rval().setString(result);
   1136  return true;
   1137 #endif
   1138 }
   1139 
   1140 static inline bool ToUpperCaseHasSpecialCasing(Latin1Char charCode) {
   1141  // U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
   1142  // special casing rules, so detect it inline.
   1143  bool hasUpperCaseSpecialCasing =
   1144      charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
   1145  MOZ_ASSERT(hasUpperCaseSpecialCasing ==
   1146             unicode::ChangesWhenUpperCasedSpecialCasing(charCode));
   1147 
   1148  return hasUpperCaseSpecialCasing;
   1149 }
   1150 
   1151 static inline bool ToUpperCaseHasSpecialCasing(char16_t charCode) {
   1152  return unicode::ChangesWhenUpperCasedSpecialCasing(charCode);
   1153 }
   1154 
   1155 static inline size_t ToUpperCaseLengthSpecialCasing(Latin1Char charCode) {
   1156  // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
   1157  MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
   1158 
   1159  return 2;
   1160 }
   1161 
   1162 static inline size_t ToUpperCaseLengthSpecialCasing(char16_t charCode) {
   1163  MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode));
   1164 
   1165  return unicode::LengthUpperCaseSpecialCasing(charCode);
   1166 }
   1167 
   1168 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
   1169                                                           Latin1Char* elements,
   1170                                                           size_t* index) {
   1171  // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
   1172  MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
   1173  static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
   1174 
   1175  elements[(*index)++] = 'S';
   1176  elements[(*index)++] = 'S';
   1177 }
   1178 
   1179 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
   1180                                                           char16_t* elements,
   1181                                                           size_t* index) {
   1182  unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
   1183 }
   1184 
   1185 // See ToLowerCaseImpl for an explanation of the parameters.
   1186 template <typename DestChar, typename SrcChar>
   1187 static size_t ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars,
   1188                              size_t startIndex, size_t srcLength,
   1189                              size_t destLength) {
   1190  static_assert(std::is_same_v<SrcChar, Latin1Char> ||
   1191                    !std::is_same_v<DestChar, Latin1Char>,
   1192                "cannot write non-Latin-1 characters into Latin-1 string");
   1193  MOZ_ASSERT(startIndex < srcLength);
   1194  MOZ_ASSERT(srcLength <= destLength);
   1195 
   1196  size_t j = startIndex;
   1197  for (size_t i = startIndex; i < srcLength; i++) {
   1198    char16_t c = srcChars[i];
   1199    if constexpr (!std::is_same_v<DestChar, Latin1Char>) {
   1200      if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
   1201        char16_t trail = srcChars[i + 1];
   1202        if (unicode::IsTrailSurrogate(trail)) {
   1203          trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
   1204          destChars[j++] = c;
   1205          destChars[j++] = trail;
   1206          i++;
   1207          continue;
   1208        }
   1209      }
   1210    }
   1211 
   1212    if (MOZ_UNLIKELY(c > 0x7f &&
   1213                     ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) {
   1214      // Return if the output buffer is too small.
   1215      if (srcLength == destLength) {
   1216        return i;
   1217      }
   1218 
   1219      ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j);
   1220      continue;
   1221    }
   1222 
   1223    c = unicode::ToUpperCase(c);
   1224    if constexpr (std::is_same_v<DestChar, Latin1Char>) {
   1225      MOZ_ASSERT(c <= JSString::MAX_LATIN1_CHAR);
   1226    }
   1227    destChars[j++] = c;
   1228  }
   1229 
   1230  MOZ_ASSERT(j == destLength);
   1231  return srcLength;
   1232 }
   1233 
   1234 template <typename CharT>
   1235 static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex,
   1236                                size_t length) {
   1237  size_t upperLength = length;
   1238  for (size_t i = startIndex; i < length; i++) {
   1239    char16_t c = chars[i];
   1240 
   1241    if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c))) {
   1242      upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1;
   1243    }
   1244  }
   1245  return upperLength;
   1246 }
   1247 
   1248 template <typename DestChar, typename SrcChar>
   1249 static inline bool ToUpperCase(JSContext* cx, StringChars<DestChar>& newChars,
   1250                               const SrcChar* chars, size_t startIndex,
   1251                               size_t length, size_t* resultLength) {
   1252  MOZ_ASSERT(startIndex < length);
   1253 
   1254  AutoCheckCannotGC nogc;
   1255 
   1256  *resultLength = length;
   1257  if (!newChars.maybeAlloc(cx, length)) {
   1258    return false;
   1259  }
   1260 
   1261  CopyChars(newChars.data(nogc), chars, startIndex);
   1262 
   1263  size_t readChars =
   1264      ToUpperCaseImpl(newChars.data(nogc), chars, startIndex, length, length);
   1265  if (readChars < length) {
   1266    size_t actualLength = ToUpperCaseLength(chars, readChars, length);
   1267 
   1268    *resultLength = actualLength;
   1269    if (!newChars.maybeRealloc(cx, length, actualLength)) {
   1270      return false;
   1271    }
   1272 
   1273    MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.data(nogc), chars,
   1274                                              readChars, length, actualLength));
   1275  }
   1276 
   1277  return true;
   1278 }
   1279 
   1280 template <typename CharT>
   1281 static JSLinearString* ToUpperCase(JSContext* cx, JSLinearString* str) {
   1282  using Latin1StringChars = StringChars<Latin1Char>;
   1283  using TwoByteStringChars = StringChars<char16_t>;
   1284 
   1285  mozilla::MaybeOneOf<Latin1StringChars, TwoByteStringChars> newChars;
   1286  const size_t length = str->length();
   1287  size_t resultLength;
   1288  {
   1289    AutoCheckCannotGC nogc;
   1290    const CharT* chars = str->chars<CharT>(nogc);
   1291 
   1292    // Most one element Latin-1 strings can be directly retrieved from the
   1293    // static strings cache.
   1294    if constexpr (std::is_same_v<CharT, Latin1Char>) {
   1295      if (length == 1) {
   1296        Latin1Char c = chars[0];
   1297        if (c != unicode::MICRO_SIGN &&
   1298            c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
   1299            c != unicode::LATIN_SMALL_LETTER_SHARP_S) {
   1300          char16_t upper = unicode::ToUpperCase(c);
   1301          MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
   1302          MOZ_ASSERT(StaticStrings::hasUnit(upper));
   1303 
   1304          return cx->staticStrings().getUnit(upper);
   1305        }
   1306 
   1307        MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
   1308                   ToUpperCaseHasSpecialCasing(c));
   1309      }
   1310    }
   1311 
   1312    // Look for the first character that changes when uppercased.
   1313    size_t i = 0;
   1314    for (; i < length; i++) {
   1315      CharT c = chars[i];
   1316      if constexpr (!std::is_same_v<CharT, Latin1Char>) {
   1317        if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
   1318          CharT trail = chars[i + 1];
   1319          if (unicode::IsTrailSurrogate(trail)) {
   1320            if (unicode::ChangesWhenUpperCasedNonBMP(c, trail)) {
   1321              break;
   1322            }
   1323 
   1324            i++;
   1325            continue;
   1326          }
   1327        }
   1328      }
   1329      if (unicode::ChangesWhenUpperCased(c)) {
   1330        break;
   1331      }
   1332      if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c))) {
   1333        break;
   1334      }
   1335    }
   1336 
   1337    // If no character needs to change, return the input string.
   1338    if (i == length) {
   1339      return str;
   1340    }
   1341 
   1342    // The string changes when uppercased, so we must create a new string.
   1343    // Can it be Latin-1?
   1344    //
   1345    // If the original string is Latin-1, it can -- unless the string
   1346    // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
   1347    // the only Latin-1 codepoints that don't uppercase within Latin-1.
   1348    // Search for those codepoints to decide whether the new string can be
   1349    // Latin-1.
   1350    // If the original string is a two-byte string, its uppercase form is
   1351    // so rarely Latin-1 that we don't even consider creating a new
   1352    // Latin-1 string.
   1353    if constexpr (std::is_same_v<CharT, Latin1Char>) {
   1354      bool resultIsLatin1 = std::none_of(chars + i, chars + length, [](auto c) {
   1355        bool upperCaseIsTwoByte =
   1356            c == unicode::MICRO_SIGN ||
   1357            c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS;
   1358        MOZ_ASSERT(upperCaseIsTwoByte ==
   1359                   (unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR));
   1360        return upperCaseIsTwoByte;
   1361      });
   1362 
   1363      if (resultIsLatin1) {
   1364        newChars.construct<Latin1StringChars>(cx);
   1365 
   1366        if (!ToUpperCase(cx, newChars.ref<Latin1StringChars>(), chars, i,
   1367                         length, &resultLength)) {
   1368          return nullptr;
   1369        }
   1370      } else {
   1371        newChars.construct<TwoByteStringChars>(cx);
   1372 
   1373        if (!ToUpperCase(cx, newChars.ref<TwoByteStringChars>(), chars, i,
   1374                         length, &resultLength)) {
   1375          return nullptr;
   1376        }
   1377      }
   1378    } else {
   1379      newChars.construct<TwoByteStringChars>(cx);
   1380 
   1381      if (!ToUpperCase(cx, newChars.ref<TwoByteStringChars>(), chars, i, length,
   1382                       &resultLength)) {
   1383        return nullptr;
   1384      }
   1385    }
   1386  }
   1387 
   1388  auto toString = [&](auto& chars) {
   1389    return chars.template toStringDontDeflate<CanGC>(cx, resultLength);
   1390  };
   1391 
   1392  return newChars.mapNonEmpty(toString);
   1393 }
   1394 
   1395 JSLinearString* js::StringToUpperCase(JSContext* cx, JSString* string) {
   1396  JSLinearString* linear = string->ensureLinear(cx);
   1397  if (!linear) {
   1398    return nullptr;
   1399  }
   1400 
   1401  if (linear->hasLatin1Chars()) {
   1402    return ToUpperCase<Latin1Char>(cx, linear);
   1403  }
   1404  return ToUpperCase<char16_t>(cx, linear);
   1405 }
   1406 
   1407 static bool str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) {
   1408  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "toUpperCase");
   1409  CallArgs args = CallArgsFromVp(argc, vp);
   1410 
   1411  JSString* str = ToStringForStringFunction(cx, "toUpperCase", args.thisv());
   1412  if (!str) {
   1413    return false;
   1414  }
   1415 
   1416  JSString* result = StringToUpperCase(cx, str);
   1417  if (!result) {
   1418    return false;
   1419  }
   1420 
   1421  args.rval().setString(result);
   1422  return true;
   1423 }
   1424 
   1425 static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
   1426  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype",
   1427                                        "toLocaleUpperCase");
   1428  CallArgs args = CallArgsFromVp(argc, vp);
   1429 
   1430  Rooted<JSString*> str(
   1431      cx, ToStringForStringFunction(cx, "toLocaleUpperCase", args.thisv()));
   1432  if (!str) {
   1433    return false;
   1434  }
   1435 
   1436 #if JS_HAS_INTL_API
   1437  // Step 3.
   1438  auto* result = TransformCase(cx, str, args.get(0), TargetCase::Upper);
   1439  if (!result) {
   1440    return false;
   1441  }
   1442 
   1443  args.rval().setString(result);
   1444  return true;
   1445 #else
   1446  /*
   1447   * Forcefully ignore the first (or any) argument and return toUpperCase(),
   1448   * ECMA has reserved that argument, presumably for defining the locale.
   1449   */
   1450  if (cx->runtime()->localeCallbacks &&
   1451      cx->runtime()->localeCallbacks->localeToUpperCase) {
   1452    Rooted<Value> result(cx);
   1453    if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) {
   1454      return false;
   1455    }
   1456 
   1457    args.rval().set(result);
   1458    return true;
   1459  }
   1460 
   1461  Rooted<JSLinearString*> linear(cx, str->ensureLinear(cx));
   1462  if (!linear) {
   1463    return false;
   1464  }
   1465 
   1466  JSString* result = StringToUpperCase(cx, linear);
   1467  if (!result) {
   1468    return false;
   1469  }
   1470 
   1471  args.rval().setString(result);
   1472  return true;
   1473 #endif
   1474 }
   1475 
   1476 /**
   1477 * String.prototype.localeCompare ( that [ , reserved1 [ , reserved2 ] ] )
   1478 *
   1479 * ES2025 draft rev 76814cbd5d7842c2a99d28e6e8c7833f1de5bee0
   1480 *
   1481 * String.prototype.localeCompare ( that [ , locales [ , options ] ] )
   1482 *
   1483 * ES2025 Intl draft rev 6827e6e40b45fb313472595be31352451a2d85fa
   1484 */
   1485 static bool str_localeCompare(JSContext* cx, unsigned argc, Value* vp) {
   1486  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype",
   1487                                        "localeCompare");
   1488  CallArgs args = CallArgsFromVp(argc, vp);
   1489 
   1490  // Steps 1-2.
   1491  RootedString str(
   1492      cx, ToStringForStringFunction(cx, "localeCompare", args.thisv()));
   1493  if (!str) {
   1494    return false;
   1495  }
   1496 
   1497  // Step 3.
   1498  RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
   1499  if (!thatStr) {
   1500    return false;
   1501  }
   1502 
   1503 #if JS_HAS_INTL_API
   1504  HandleValue locales = args.get(1);
   1505  HandleValue options = args.get(2);
   1506 
   1507  // Step 4.
   1508  Rooted<CollatorObject*> collator(
   1509      cx, intl::GetOrCreateCollator(cx, locales, options));
   1510  if (!collator) {
   1511    return false;
   1512  }
   1513 
   1514  // Step 5.
   1515  return intl::CompareStrings(cx, collator, str, thatStr, args.rval());
   1516 #else
   1517  // Delegate to JSLocaleCallbacks when Intl functionality is not exposed.
   1518  if (cx->runtime()->localeCallbacks &&
   1519      cx->runtime()->localeCallbacks->localeCompare) {
   1520    RootedValue result(cx);
   1521    if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr,
   1522                                                       &result)) {
   1523      return false;
   1524    }
   1525 
   1526    args.rval().set(result);
   1527    return true;
   1528  }
   1529 
   1530  int32_t result;
   1531  if (!CompareStrings(cx, str, thatStr, &result)) {
   1532    return false;
   1533  }
   1534 
   1535  args.rval().setInt32(result);
   1536  return true;
   1537 #endif  // JS_HAS_INTL_API
   1538 }
   1539 
   1540 #if JS_HAS_INTL_API
   1541 
   1542 // ES2017 draft rev 45e890512fd77add72cc0ee742785f9f6f6482de
   1543 // 21.1.3.12 String.prototype.normalize ( [ form ] )
   1544 //
   1545 // String.prototype.normalize is only implementable if ICU's normalization
   1546 // functionality is available.
   1547 static bool str_normalize(JSContext* cx, unsigned argc, Value* vp) {
   1548  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "normalize");
   1549  CallArgs args = CallArgsFromVp(argc, vp);
   1550 
   1551  // Steps 1-2.
   1552  RootedString str(cx,
   1553                   ToStringForStringFunction(cx, "normalize", args.thisv()));
   1554  if (!str) {
   1555    return false;
   1556  }
   1557 
   1558  using NormalizationForm = mozilla::intl::String::NormalizationForm;
   1559 
   1560  NormalizationForm form;
   1561  if (!args.hasDefined(0)) {
   1562    // Step 3.
   1563    form = NormalizationForm::NFC;
   1564  } else {
   1565    // Step 4.
   1566    JSLinearString* formStr = ArgToLinearString(cx, args, 0);
   1567    if (!formStr) {
   1568      return false;
   1569    }
   1570 
   1571    // Step 5.
   1572    if (EqualStrings(formStr, cx->names().NFC)) {
   1573      form = NormalizationForm::NFC;
   1574    } else if (EqualStrings(formStr, cx->names().NFD)) {
   1575      form = NormalizationForm::NFD;
   1576    } else if (EqualStrings(formStr, cx->names().NFKC)) {
   1577      form = NormalizationForm::NFKC;
   1578    } else if (EqualStrings(formStr, cx->names().NFKD)) {
   1579      form = NormalizationForm::NFKD;
   1580    } else {
   1581      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
   1582                                JSMSG_INVALID_NORMALIZE_FORM);
   1583      return false;
   1584    }
   1585  }
   1586 
   1587  // Latin-1 strings are already in Normalization Form C.
   1588  if (form == NormalizationForm::NFC && str->hasLatin1Chars()) {
   1589    // Step 7.
   1590    args.rval().setString(str);
   1591    return true;
   1592  }
   1593 
   1594  // Step 6.
   1595  AutoStableStringChars stableChars(cx);
   1596  if (!stableChars.initTwoByte(cx, str)) {
   1597    return false;
   1598  }
   1599 
   1600  mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();
   1601 
   1602  static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
   1603 
   1604  intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx);
   1605 
   1606  auto alreadyNormalized =
   1607      mozilla::intl::String::Normalize(form, srcChars, buffer);
   1608  if (alreadyNormalized.isErr()) {
   1609    intl::ReportInternalError(cx, alreadyNormalized.unwrapErr());
   1610    return false;
   1611  }
   1612 
   1613  using AlreadyNormalized = mozilla::intl::String::AlreadyNormalized;
   1614 
   1615  // Return if the input string is already normalized.
   1616  if (alreadyNormalized.unwrap() == AlreadyNormalized::Yes) {
   1617    // Step 7.
   1618    args.rval().setString(str);
   1619    return true;
   1620  }
   1621 
   1622  JSString* ns = buffer.toString(cx);
   1623  if (!ns) {
   1624    return false;
   1625  }
   1626 
   1627  // Step 7.
   1628  args.rval().setString(ns);
   1629  return true;
   1630 }
   1631 
   1632 #endif  // JS_HAS_INTL_API
   1633 
   1634 /**
   1635 * IsStringWellFormedUnicode ( string )
   1636 * https://tc39.es/ecma262/#sec-isstringwellformedunicode
   1637 */
   1638 static bool IsStringWellFormedUnicode(JSContext* cx, JSString* str,
   1639                                      size_t* isWellFormedUpTo) {
   1640  MOZ_ASSERT(isWellFormedUpTo);
   1641  *isWellFormedUpTo = 0;
   1642 
   1643  AutoCheckCannotGC nogc;
   1644 
   1645  size_t len = str->length();
   1646 
   1647  // Latin1 chars are well-formed.
   1648  if (str->hasLatin1Chars()) {
   1649    *isWellFormedUpTo = len;
   1650    return true;
   1651  }
   1652 
   1653  JSLinearString* linear = str->ensureLinear(cx);
   1654  if (!linear) {
   1655    return false;
   1656  }
   1657 
   1658  *isWellFormedUpTo = Utf16ValidUpTo(Span{linear->twoByteChars(nogc), len});
   1659  return true;
   1660 }
   1661 
   1662 /**
   1663 * Well-Formed Unicode Strings (Stage 3 proposal)
   1664 *
   1665 * String.prototype.isWellFormed
   1666 * https://tc39.es/proposal-is-usv-string/#sec-string.prototype.iswellformed
   1667 */
   1668 static bool str_isWellFormed(JSContext* cx, unsigned argc, Value* vp) {
   1669  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "isWellFormed");
   1670  CallArgs args = CallArgsFromVp(argc, vp);
   1671 
   1672  // Step 1. Let O be ? RequireObjectCoercible(this value).
   1673  // Step 2. Let S be ? ToString(O).
   1674  JSString* str = ToStringForStringFunction(cx, "isWellFormed", args.thisv());
   1675  if (!str) {
   1676    return false;
   1677  }
   1678 
   1679  // Step 3. Return IsStringWellFormedUnicode(S).
   1680  size_t isWellFormedUpTo;
   1681  if (!IsStringWellFormedUnicode(cx, str, &isWellFormedUpTo)) {
   1682    return false;
   1683  }
   1684  MOZ_ASSERT(isWellFormedUpTo <= str->length());
   1685 
   1686  args.rval().setBoolean(isWellFormedUpTo == str->length());
   1687  return true;
   1688 }
   1689 
   1690 /**
   1691 * Well-Formed Unicode Strings (Stage 3 proposal)
   1692 *
   1693 * String.prototype.toWellFormed
   1694 * https://tc39.es/proposal-is-usv-string/#sec-string.prototype.towellformed
   1695 */
   1696 static bool str_toWellFormed(JSContext* cx, unsigned argc, Value* vp) {
   1697  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "toWellFormed");
   1698  CallArgs args = CallArgsFromVp(argc, vp);
   1699 
   1700  // Step 1. Let O be ? RequireObjectCoercible(this value).
   1701  // Step 2. Let S be ? ToString(O).
   1702  RootedString str(cx,
   1703                   ToStringForStringFunction(cx, "toWellFormed", args.thisv()));
   1704  if (!str) {
   1705    return false;
   1706  }
   1707 
   1708  // Step 3. Let strLen be the length of S.
   1709  size_t len = str->length();
   1710 
   1711  // If the string itself is well-formed, return it.
   1712  size_t isWellFormedUpTo;
   1713  if (!IsStringWellFormedUnicode(cx, str, &isWellFormedUpTo)) {
   1714    return false;
   1715  }
   1716  if (isWellFormedUpTo == len) {
   1717    args.rval().setString(str);
   1718    return true;
   1719  }
   1720  MOZ_ASSERT(isWellFormedUpTo < len);
   1721 
   1722  // Step 4-6
   1723  StringChars<char16_t> newChars(cx);
   1724  if (!newChars.maybeAlloc(cx, len)) {
   1725    return false;
   1726  }
   1727 
   1728  {
   1729    AutoCheckCannotGC nogc;
   1730 
   1731    JSLinearString* linear = str->ensureLinear(cx);
   1732    MOZ_ASSERT(linear, "IsStringWellFormedUnicode linearized the string");
   1733 
   1734    PodCopy(newChars.data(nogc), linear->twoByteChars(nogc), len);
   1735 
   1736    auto span = mozilla::Span{newChars.data(nogc), len};
   1737 
   1738    // Replace the character.
   1739    span[isWellFormedUpTo] = unicode::REPLACEMENT_CHARACTER;
   1740 
   1741    // Check any remaining characters.
   1742    auto remaining = span.From(isWellFormedUpTo + 1);
   1743    if (!remaining.IsEmpty()) {
   1744      EnsureUtf16ValiditySpan(remaining);
   1745    }
   1746  }
   1747 
   1748  JSString* result = newChars.toStringDontDeflateNonStatic<CanGC>(cx, len);
   1749  if (!result) {
   1750    return false;
   1751  }
   1752 
   1753  // Step 7. Return result.
   1754  args.rval().setString(result);
   1755  return true;
   1756 }
   1757 
   1758 // Clamp |value| to a string index between 0 and |length|.
   1759 static MOZ_ALWAYS_INLINE bool ToClampedStringIndex(JSContext* cx,
   1760                                                   Handle<Value> value,
   1761                                                   uint32_t length,
   1762                                                   uint32_t* result) {
   1763  // Handle the common case of int32 indices first.
   1764  if (value.isInt32()) {
   1765    int32_t i = value.toInt32();
   1766    *result = std::min(uint32_t(std::max(i, 0)), length);
   1767    return true;
   1768  }
   1769 
   1770  double d;
   1771  if (!ToInteger(cx, value, &d)) {
   1772    return false;
   1773  }
   1774  *result = uint32_t(std::clamp(d, 0.0, double(length)));
   1775  return true;
   1776 }
   1777 
   1778 // Return |Some(index)| if |value| is a string index between 0 and |length|.
   1779 // Otherwise return |Nothing|.
   1780 static MOZ_ALWAYS_INLINE bool ToStringIndex(JSContext* cx, Handle<Value> value,
   1781                                            size_t length,
   1782                                            mozilla::Maybe<size_t>* result) {
   1783  // Handle the common case of int32 indices first.
   1784  if (MOZ_LIKELY(value.isInt32())) {
   1785    size_t index = size_t(value.toInt32());
   1786    if (index < length) {
   1787      *result = mozilla::Some(index);
   1788    }
   1789    return true;
   1790  }
   1791 
   1792  double index = 0.0;
   1793  if (!ToInteger(cx, value, &index)) {
   1794    return false;
   1795  }
   1796  if (0 <= index && index < length) {
   1797    *result = mozilla::Some(size_t(index));
   1798  }
   1799  return true;
   1800 }
   1801 
   1802 // Return |Some(index)| if |value| is a relative string index between 0 and
   1803 // |length|. Otherwise return |Nothing|.
   1804 static MOZ_ALWAYS_INLINE bool ToRelativeStringIndex(
   1805    JSContext* cx, Handle<Value> value, size_t length,
   1806    mozilla::Maybe<size_t>* result) {
   1807  // Handle the common case of int32 indices first.
   1808  if (MOZ_LIKELY(value.isInt32())) {
   1809    int32_t index = value.toInt32();
   1810    if (index < 0) {
   1811      index += int32_t(length);
   1812    }
   1813    if (size_t(index) < length) {
   1814      *result = mozilla::Some(size_t(index));
   1815    }
   1816    return true;
   1817  }
   1818 
   1819  double index = 0.0;
   1820  if (!ToInteger(cx, value, &index)) {
   1821    return false;
   1822  }
   1823  if (index < 0) {
   1824    index += length;
   1825  }
   1826  if (0 <= index && index < length) {
   1827    *result = mozilla::Some(size_t(index));
   1828  }
   1829  return true;
   1830 }
   1831 
   1832 /**
   1833 * 22.1.3.2 String.prototype.charAt ( pos )
   1834 *
   1835 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35
   1836 */
   1837 static bool str_charAt(JSContext* cx, unsigned argc, Value* vp) {
   1838  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "charAt");
   1839  CallArgs args = CallArgsFromVp(argc, vp);
   1840 
   1841  // Steps 1-2.
   1842  RootedString str(cx, ToStringForStringFunction(cx, "charAt", args.thisv()));
   1843  if (!str) {
   1844    return false;
   1845  }
   1846 
   1847  // Step 3.
   1848  mozilla::Maybe<size_t> index{};
   1849  if (!ToStringIndex(cx, args.get(0), str->length(), &index)) {
   1850    return false;
   1851  }
   1852 
   1853  // Steps 4-5.
   1854  if (index.isNothing()) {
   1855    args.rval().setString(cx->runtime()->emptyString);
   1856    return true;
   1857  }
   1858  MOZ_ASSERT(*index < str->length());
   1859 
   1860  // Step 6.
   1861  auto* result = cx->staticStrings().getUnitStringForElement(cx, str, *index);
   1862  if (!result) {
   1863    return false;
   1864  }
   1865  args.rval().setString(result);
   1866  return true;
   1867 }
   1868 
   1869 /**
   1870 * 22.1.3.3 String.prototype.charCodeAt ( pos )
   1871 *
   1872 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35
   1873 */
   1874 bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) {
   1875  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "charCodeAt");
   1876  CallArgs args = CallArgsFromVp(argc, vp);
   1877 
   1878  // Steps 1-2.
   1879  RootedString str(cx,
   1880                   ToStringForStringFunction(cx, "charCodeAt", args.thisv()));
   1881  if (!str) {
   1882    return false;
   1883  }
   1884 
   1885  // Step 3.
   1886  mozilla::Maybe<size_t> index{};
   1887  if (!ToStringIndex(cx, args.get(0), str->length(), &index)) {
   1888    return false;
   1889  }
   1890 
   1891  // Steps 4-5.
   1892  if (index.isNothing()) {
   1893    args.rval().setNaN();
   1894    return true;
   1895  }
   1896  MOZ_ASSERT(*index < str->length());
   1897 
   1898  // Step 6.
   1899  char16_t c;
   1900  if (!str->getChar(cx, *index, &c)) {
   1901    return false;
   1902  }
   1903  args.rval().setInt32(c);
   1904  return true;
   1905 }
   1906 
   1907 /**
   1908 * 22.1.3.4 String.prototype.codePointAt ( pos )
   1909 *
   1910 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35
   1911 */
   1912 bool js::str_codePointAt(JSContext* cx, unsigned argc, Value* vp) {
   1913  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "codePointAt");
   1914  CallArgs args = CallArgsFromVp(argc, vp);
   1915 
   1916  // Steps 1-2.
   1917  RootedString str(cx,
   1918                   ToStringForStringFunction(cx, "codePointAt", args.thisv()));
   1919  if (!str) {
   1920    return false;
   1921  }
   1922 
   1923  // Step 3.
   1924  mozilla::Maybe<size_t> index{};
   1925  if (!ToStringIndex(cx, args.get(0), str->length(), &index)) {
   1926    return false;
   1927  }
   1928 
   1929  // Steps 4-5.
   1930  if (index.isNothing()) {
   1931    args.rval().setUndefined();
   1932    return true;
   1933  }
   1934  MOZ_ASSERT(*index < str->length());
   1935 
   1936  // Step 6.
   1937  char32_t codePoint;
   1938  if (!str->getCodePoint(cx, *index, &codePoint)) {
   1939    return false;
   1940  }
   1941 
   1942  // Step 7.
   1943  args.rval().setInt32(codePoint);
   1944  return true;
   1945 }
   1946 
   1947 /**
   1948 * 22.1.3.1 String.prototype.at ( index )
   1949 *
   1950 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35
   1951 */
   1952 static bool str_at(JSContext* cx, unsigned argc, Value* vp) {
   1953  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "at");
   1954  CallArgs args = CallArgsFromVp(argc, vp);
   1955 
   1956  // Steps 1-2.
   1957  RootedString str(cx, ToStringForStringFunction(cx, "at", args.thisv()));
   1958  if (!str) {
   1959    return false;
   1960  }
   1961 
   1962  // Steps 3-6.
   1963  mozilla::Maybe<size_t> index{};
   1964  if (!ToRelativeStringIndex(cx, args.get(0), str->length(), &index)) {
   1965    return false;
   1966  }
   1967 
   1968  // Step 7.
   1969  if (index.isNothing()) {
   1970    args.rval().setUndefined();
   1971    return true;
   1972  }
   1973  MOZ_ASSERT(*index < str->length());
   1974 
   1975  // Step 8.
   1976  auto* result = cx->staticStrings().getUnitStringForElement(cx, str, *index);
   1977  if (!result) {
   1978    return false;
   1979  }
   1980  args.rval().setString(result);
   1981  return true;
   1982 }
   1983 
   1984 /*
   1985 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
   1986 * The patlen argument must be positive and no greater than sBMHPatLenMax.
   1987 *
   1988 * Return the index of pat in text, or -1 if not found.
   1989 */
   1990 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
   1991 static const uint32_t sBMHPatLenMax = 255;   /* skip table element is uint8_t */
   1992 static const int sBMHBadPattern =
   1993    -2; /* return value if pat is not ISO-Latin-1 */
   1994 
   1995 template <typename TextChar, typename PatChar>
   1996 static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen,
   1997                              const PatChar* pat, uint32_t patLen) {
   1998  MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
   1999 
   2000  uint8_t skip[sBMHCharSetSize];
   2001  for (uint32_t i = 0; i < sBMHCharSetSize; i++) {
   2002    skip[i] = uint8_t(patLen);
   2003  }
   2004 
   2005  uint32_t patLast = patLen - 1;
   2006  for (uint32_t i = 0; i < patLast; i++) {
   2007    char16_t c = pat[i];
   2008    if (c >= sBMHCharSetSize) {
   2009      return sBMHBadPattern;
   2010    }
   2011    skip[c] = uint8_t(patLast - i);
   2012  }
   2013 
   2014  for (uint32_t k = patLast; k < textLen;) {
   2015    for (uint32_t i = k, j = patLast;; i--, j--) {
   2016      if (text[i] != pat[j]) {
   2017        break;
   2018      }
   2019      if (j == 0) {
   2020        return static_cast<int>(i); /* safe: max string size */
   2021      }
   2022    }
   2023 
   2024    char16_t c = text[k];
   2025    k += (c >= sBMHCharSetSize) ? patLen : skip[c];
   2026  }
   2027  return -1;
   2028 }
   2029 
   2030 template <typename TextChar, typename PatChar>
   2031 struct MemCmp {
   2032  using Extent = uint32_t;
   2033  static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
   2034                                                uint32_t patLen) {
   2035    return (patLen - 2) * sizeof(PatChar);
   2036  }
   2037  static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
   2038                                      Extent extent) {
   2039    MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
   2040    return memcmp(p, t, extent) == 0;
   2041  }
   2042 };
   2043 
   2044 template <typename TextChar, typename PatChar>
   2045 struct ManualCmp {
   2046  using Extent = const PatChar*;
   2047  static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat,
   2048                                                uint32_t patLen) {
   2049    return pat + patLen;
   2050  }
   2051  static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
   2052                                      Extent extent) {
   2053    for (; p != extent; ++p, ++t) {
   2054      if (*p != *t) {
   2055        return false;
   2056      }
   2057    }
   2058    return true;
   2059  }
   2060 };
   2061 
   2062 template <class InnerMatch, typename TextChar, typename PatChar>
   2063 static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
   2064                   uint32_t patlen) {
   2065  MOZ_ASSERT(patlen > 1);
   2066 
   2067  const typename InnerMatch::Extent extent =
   2068      InnerMatch::computeExtent(pat, patlen);
   2069 
   2070  uint32_t i = 0;
   2071  uint32_t n = textlen - patlen + 1;
   2072 
   2073  while (i < n) {
   2074    const TextChar* pos;
   2075 
   2076    // This is a bit awkward. Consider the case where we're searching "abcdef"
   2077    // for "def". n will be 4, because we know in advance that the last place we
   2078    // can *start* a successful search will be at 'd'. However, if we just use n
   2079    // - i, then our first search will be looking through "abcd" for "de",
   2080    // because our memchr2xN functions search for two characters at a time. So
   2081    // we just have to compensate by adding 1. This will never exceed textlen
   2082    // because we know patlen is at least two.
   2083    size_t searchLen = n - i + 1;
   2084    if (sizeof(TextChar) == 1) {
   2085      MOZ_ASSERT(pat[0] <= 0xff);
   2086      pos = (TextChar*)SIMD::memchr2x8((char*)text + i, pat[0], pat[1],
   2087                                       searchLen);
   2088    } else {
   2089      pos = (TextChar*)SIMD::memchr2x16((char16_t*)(text + i), char16_t(pat[0]),
   2090                                        char16_t(pat[1]), searchLen);
   2091    }
   2092 
   2093    if (pos == nullptr) {
   2094      return -1;
   2095    }
   2096 
   2097    i = static_cast<uint32_t>(pos - text);
   2098    const uint32_t inlineLookaheadChars = 2;
   2099    if (InnerMatch::match(pat + inlineLookaheadChars,
   2100                          text + i + inlineLookaheadChars, extent)) {
   2101      return i;
   2102    }
   2103 
   2104    i += 1;
   2105  }
   2106  return -1;
   2107 }
   2108 
   2109 template <typename TextChar, typename PatChar>
   2110 static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen,
   2111                                         const PatChar* pat, uint32_t patLen) {
   2112  if (patLen == 0) {
   2113    return 0;
   2114  }
   2115  if (textLen < patLen) {
   2116    return -1;
   2117  }
   2118 
   2119  if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) {
   2120    return -1;
   2121  }
   2122 
   2123  if (patLen == 1) {
   2124    const TextChar* pos;
   2125    if (sizeof(TextChar) == 1) {
   2126      MOZ_ASSERT(pat[0] <= 0xff);
   2127      pos = (TextChar*)SIMD::memchr8((char*)text, pat[0], textLen);
   2128    } else {
   2129      pos =
   2130          (TextChar*)SIMD::memchr16((char16_t*)text, char16_t(pat[0]), textLen);
   2131    }
   2132 
   2133    if (pos == nullptr) {
   2134      return -1;
   2135    }
   2136 
   2137    return pos - text;
   2138  }
   2139 
   2140  // We use a fast two-character-wide search in Matcher below, so we need to
   2141  // validate that pat[1] isn't outside the latin1 range up front if the
   2142  // sizes are different.
   2143  if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[1] > 0xff) {
   2144    return -1;
   2145  }
   2146 
   2147  /*
   2148   * If the text or pattern string is short, BMH will be more expensive than
   2149   * the basic linear scan due to initialization cost and a more complex loop
   2150   * body. While the correct threshold is input-dependent, we can make a few
   2151   * conservative observations:
   2152   *  - When |textLen| is "big enough", the initialization time will be
   2153   *    proportionally small, so the worst-case slowdown is minimized.
   2154   *  - When |patLen| is "too small", even the best case for BMH will be
   2155   *    slower than a simple scan for large |textLen| due to the more complex
   2156   *    loop body of BMH.
   2157   * From this, the values for "big enough" and "too small" are determined
   2158   * empirically. See bug 526348.
   2159   */
   2160  if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
   2161    int index = BoyerMooreHorspool(text, textLen, pat, patLen);
   2162    if (index != sBMHBadPattern) {
   2163      return index;
   2164    }
   2165  }
   2166 
   2167  /*
   2168   * For big patterns with large potential overlap we want the SIMD-optimized
   2169   * speed of memcmp. For small patterns, a simple loop is faster. We also can't
   2170   * use memcmp if one of the strings is TwoByte and the other is Latin-1.
   2171   */
   2172  return (patLen > 128 && std::is_same_v<TextChar, PatChar>)
   2173             ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(
   2174                   text, textLen, pat, patLen)
   2175             : Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(
   2176                   text, textLen, pat, patLen);
   2177 }
   2178 
   2179 static int32_t StringMatch(const JSLinearString* text,
   2180                           const JSLinearString* pat, uint32_t start = 0) {
   2181  MOZ_ASSERT(start <= text->length());
   2182  uint32_t textLen = text->length() - start;
   2183  uint32_t patLen = pat->length();
   2184 
   2185  int match;
   2186  AutoCheckCannotGC nogc;
   2187  if (text->hasLatin1Chars()) {
   2188    const Latin1Char* textChars = text->latin1Chars(nogc) + start;
   2189    if (pat->hasLatin1Chars()) {
   2190      match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
   2191    } else {
   2192      match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
   2193    }
   2194  } else {
   2195    const char16_t* textChars = text->twoByteChars(nogc) + start;
   2196    if (pat->hasLatin1Chars()) {
   2197      match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
   2198    } else {
   2199      match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
   2200    }
   2201  }
   2202 
   2203  return (match == -1) ? -1 : start + match;
   2204 }
   2205 
   2206 static const size_t sRopeMatchThresholdRatioLog2 = 4;
   2207 
   2208 int js::StringFindPattern(const JSLinearString* text, const JSLinearString* pat,
   2209                          size_t start) {
   2210  return StringMatch(text, pat, start);
   2211 }
   2212 
   2213 using LinearStringVector = Vector<JSLinearString*, 16, SystemAllocPolicy>;
   2214 
   2215 template <typename TextChar, typename PatChar>
   2216 static int RopeMatchImpl(const AutoCheckCannotGC& nogc,
   2217                         LinearStringVector& strings, const PatChar* pat,
   2218                         size_t patLen) {
   2219  /* Absolute offset from the beginning of the logical text string. */
   2220  int pos = 0;
   2221 
   2222  for (JSLinearString** outerp = strings.begin(); outerp != strings.end();
   2223       ++outerp) {
   2224    /* Try to find a match within 'outer'. */
   2225    JSLinearString* outer = *outerp;
   2226    const TextChar* chars = outer->chars<TextChar>(nogc);
   2227    size_t len = outer->length();
   2228    int matchResult = StringMatch(chars, len, pat, patLen);
   2229    if (matchResult != -1) {
   2230      /* Matched! */
   2231      return pos + matchResult;
   2232    }
   2233 
   2234    /* Try to find a match starting in 'outer' and running into other nodes. */
   2235    const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
   2236    const TextChar* const textend = chars + len;
   2237    const PatChar p0 = *pat;
   2238    const PatChar* const p1 = pat + 1;
   2239    const PatChar* const patend = pat + patLen;
   2240    for (const TextChar* t = text; t != textend;) {
   2241      if (*t++ != p0) {
   2242        continue;
   2243      }
   2244 
   2245      JSLinearString** innerp = outerp;
   2246      const TextChar* ttend = textend;
   2247      const TextChar* tt = t;
   2248      for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) {
   2249        while (tt == ttend) {
   2250          if (++innerp == strings.end()) {
   2251            return -1;
   2252          }
   2253 
   2254          JSLinearString* inner = *innerp;
   2255          tt = inner->chars<TextChar>(nogc);
   2256          ttend = tt + inner->length();
   2257        }
   2258        if (*pp != *tt) {
   2259          goto break_continue;
   2260        }
   2261      }
   2262 
   2263      /* Matched! */
   2264      return pos + (t - chars) - 1; /* -1 because of *t++ above */
   2265 
   2266    break_continue:;
   2267    }
   2268 
   2269    pos += len;
   2270  }
   2271 
   2272  return -1;
   2273 }
   2274 
   2275 /*
   2276 * RopeMatch takes the text to search and the pattern to search for in the text.
   2277 * RopeMatch returns false on OOM and otherwise returns the match index through
   2278 * the 'match' outparam (-1 for not found).
   2279 */
   2280 static bool RopeMatch(JSContext* cx, JSRope* text, const JSLinearString* pat,
   2281                      int* match) {
   2282  uint32_t patLen = pat->length();
   2283  if (patLen == 0) {
   2284    *match = 0;
   2285    return true;
   2286  }
   2287  if (text->length() < patLen) {
   2288    *match = -1;
   2289    return true;
   2290  }
   2291 
   2292  /*
   2293   * List of leaf nodes in the rope. If we run out of memory when trying to
   2294   * append to this list, we can still fall back to StringMatch, so use the
   2295   * system allocator so we don't report OOM in that case.
   2296   */
   2297  LinearStringVector strings;
   2298 
   2299  /*
   2300   * We don't want to do rope matching if there is a poor node-to-char ratio,
   2301   * since this means spending a lot of time in the match loop below. We also
   2302   * need to build the list of leaf nodes. Do both here: iterate over the
   2303   * nodes so long as there are not too many.
   2304   *
   2305   * We also don't use rope matching if the rope contains both Latin-1 and
   2306   * TwoByte nodes, to simplify the match algorithm.
   2307   */
   2308  {
   2309    size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
   2310    StringSegmentRange r(cx);
   2311    if (!r.init(text)) {
   2312      return false;
   2313    }
   2314 
   2315    bool textIsLatin1 = text->hasLatin1Chars();
   2316    while (!r.empty()) {
   2317      if (threshold-- == 0 || r.front()->hasLatin1Chars() != textIsLatin1 ||
   2318          !strings.append(r.front())) {
   2319        JSLinearString* linear = text->ensureLinear(cx);
   2320        if (!linear) {
   2321          return false;
   2322        }
   2323 
   2324        *match = StringMatch(linear, pat);
   2325        return true;
   2326      }
   2327      if (!r.popFront()) {
   2328        return false;
   2329      }
   2330    }
   2331  }
   2332 
   2333  AutoCheckCannotGC nogc;
   2334  if (text->hasLatin1Chars()) {
   2335    if (pat->hasLatin1Chars()) {
   2336      *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc),
   2337                                         patLen);
   2338    } else {
   2339      *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc),
   2340                                         patLen);
   2341    }
   2342  } else {
   2343    if (pat->hasLatin1Chars()) {
   2344      *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc),
   2345                                       patLen);
   2346    } else {
   2347      *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc),
   2348                                       patLen);
   2349    }
   2350  }
   2351 
   2352  return true;
   2353 }
   2354 
   2355 static MOZ_ALWAYS_INLINE bool ReportErrorIfFirstArgIsRegExp(
   2356    JSContext* cx, const CallArgs& args) {
   2357  // Only call IsRegExp if the first argument is definitely an object, so we
   2358  // don't pay the cost of an additional function call in the common case.
   2359  if (args.length() == 0 || !args[0].isObject()) {
   2360    return true;
   2361  }
   2362 
   2363  bool isRegExp;
   2364  if (!IsRegExp(cx, args[0], &isRegExp)) {
   2365    return false;
   2366  }
   2367 
   2368  if (isRegExp) {
   2369    JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
   2370                              JSMSG_INVALID_ARG_TYPE, "first", "",
   2371                              "Regular Expression");
   2372    return false;
   2373  }
   2374  return true;
   2375 }
   2376 
   2377 // ES2026 draft rev a562082b031d89d00ee667181ce8a6158656bd4b
   2378 // 22.1.3.8 String.prototype.includes ( searchString [ , position ] )
   2379 bool js::str_includes(JSContext* cx, unsigned argc, Value* vp) {
   2380  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "includes");
   2381  CallArgs args = CallArgsFromVp(argc, vp);
   2382 
   2383  // Steps 1-2.
   2384  RootedString str(cx, ToStringForStringFunction(cx, "includes", args.thisv()));
   2385  if (!str) {
   2386    return false;
   2387  }
   2388 
   2389  // Steps 3-4.
   2390  if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
   2391    return false;
   2392  }
   2393 
   2394  // Step 5.
   2395  Rooted<JSLinearString*> searchStr(cx, ArgToLinearString(cx, args, 0));
   2396  if (!searchStr) {
   2397    return false;
   2398  }
   2399 
   2400  // Steps 6-9.
   2401  uint32_t start = 0;
   2402  if (args.hasDefined(1)) {
   2403    if (!ToClampedStringIndex(cx, args[1], str->length(), &start)) {
   2404      return false;
   2405    }
   2406  }
   2407 
   2408  // Steps 10-12.
   2409  JSLinearString* text = str->ensureLinear(cx);
   2410  if (!text) {
   2411    return false;
   2412  }
   2413 
   2414  args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
   2415  return true;
   2416 }
   2417 
   2418 bool js::StringIncludes(JSContext* cx, HandleString string,
   2419                        HandleString searchString, bool* result) {
   2420  JSLinearString* text = string->ensureLinear(cx);
   2421  if (!text) {
   2422    return false;
   2423  }
   2424 
   2425  JSLinearString* searchStr = searchString->ensureLinear(cx);
   2426  if (!searchStr) {
   2427    return false;
   2428  }
   2429 
   2430  *result = StringMatch(text, searchStr, 0) != -1;
   2431  return true;
   2432 }
   2433 
   2434 // ES2026 draft rev a562082b031d89d00ee667181ce8a6158656bd4b
   2435 // 22.1.3.9 String.prototype.indexOf ( searchString [ , position ] )
   2436 bool js::str_indexOf(JSContext* cx, unsigned argc, Value* vp) {
   2437  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "indexOf");
   2438  CallArgs args = CallArgsFromVp(argc, vp);
   2439 
   2440  // Steps 1-2.
   2441  RootedString str(cx, ToStringForStringFunction(cx, "indexOf", args.thisv()));
   2442  if (!str) {
   2443    return false;
   2444  }
   2445 
   2446  // Step 3.
   2447  Rooted<JSLinearString*> searchStr(cx, ArgToLinearString(cx, args, 0));
   2448  if (!searchStr) {
   2449    return false;
   2450  }
   2451 
   2452  // Steps 4-7.
   2453  uint32_t start = 0;
   2454  if (args.hasDefined(1)) {
   2455    if (!ToClampedStringIndex(cx, args[1], str->length(), &start)) {
   2456      return false;
   2457    }
   2458  }
   2459 
   2460  if (str == searchStr) {
   2461    // AngularJS often invokes "false".indexOf("false"). This check should
   2462    // be cheap enough to not hurt anything else.
   2463    args.rval().setInt32(start == 0 ? 0 : -1);
   2464    return true;
   2465  }
   2466 
   2467  // Steps 8-10.
   2468  JSLinearString* text = str->ensureLinear(cx);
   2469  if (!text) {
   2470    return false;
   2471  }
   2472 
   2473  args.rval().setInt32(StringMatch(text, searchStr, start));
   2474  return true;
   2475 }
   2476 
   2477 bool js::StringIndexOf(JSContext* cx, HandleString string,
   2478                       HandleString searchString, int32_t* result) {
   2479  if (string == searchString) {
   2480    *result = 0;
   2481    return true;
   2482  }
   2483 
   2484  JSLinearString* text = string->ensureLinear(cx);
   2485  if (!text) {
   2486    return false;
   2487  }
   2488 
   2489  JSLinearString* searchStr = searchString->ensureLinear(cx);
   2490  if (!searchStr) {
   2491    return false;
   2492  }
   2493 
   2494  *result = StringMatch(text, searchStr, 0);
   2495  return true;
   2496 }
   2497 
   2498 template <typename TextChar, typename PatChar>
   2499 static int32_t LastIndexOfImpl(const TextChar* text, size_t textLen,
   2500                               const PatChar* pat, size_t patLen,
   2501                               size_t start) {
   2502  MOZ_ASSERT(patLen > 0);
   2503  MOZ_ASSERT(patLen <= textLen);
   2504  MOZ_ASSERT(start <= textLen - patLen);
   2505 
   2506  const PatChar p0 = *pat;
   2507  const PatChar* patNext = pat + 1;
   2508  const PatChar* patEnd = pat + patLen;
   2509 
   2510  for (const TextChar* t = text + start; t >= text; --t) {
   2511    if (*t == p0) {
   2512      const TextChar* t1 = t + 1;
   2513      for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) {
   2514        if (*t1 != *p1) {
   2515          goto break_continue;
   2516        }
   2517      }
   2518 
   2519      return static_cast<int32_t>(t - text);
   2520    }
   2521  break_continue:;
   2522  }
   2523 
   2524  return -1;
   2525 }
   2526 
   2527 static int32_t LastIndexOf(const JSLinearString* text,
   2528                           const JSLinearString* searchStr, size_t start) {
   2529  AutoCheckCannotGC nogc;
   2530 
   2531  size_t len = text->length();
   2532  size_t searchLen = searchStr->length();
   2533 
   2534  if (text->hasLatin1Chars()) {
   2535    const Latin1Char* textChars = text->latin1Chars(nogc);
   2536    if (searchStr->hasLatin1Chars()) {
   2537      return LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
   2538                             searchLen, start);
   2539    }
   2540    return LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
   2541                           searchLen, start);
   2542  }
   2543 
   2544  const char16_t* textChars = text->twoByteChars(nogc);
   2545  if (searchStr->hasLatin1Chars()) {
   2546    return LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
   2547                           searchLen, start);
   2548  }
   2549  return LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
   2550                         searchLen, start);
   2551 }
   2552 
   2553 // ES2026 draft rev a562082b031d89d00ee667181ce8a6158656bd4b
   2554 // 22.1.3.11 String.prototype.lastIndexOf ( searchString [ , position ] )
   2555 static bool str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp) {
   2556  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "lastIndexOf");
   2557  CallArgs args = CallArgsFromVp(argc, vp);
   2558 
   2559  // Steps 1-2.
   2560  RootedString str(cx,
   2561                   ToStringForStringFunction(cx, "lastIndexOf", args.thisv()));
   2562  if (!str) {
   2563    return false;
   2564  }
   2565 
   2566  // Step 3.
   2567  Rooted<JSLinearString*> searchStr(cx, ArgToLinearString(cx, args, 0));
   2568  if (!searchStr) {
   2569    return false;
   2570  }
   2571 
   2572  // Step 7.
   2573  size_t len = str->length();
   2574 
   2575  // Step 8.
   2576  size_t searchLen = searchStr->length();
   2577 
   2578  // Steps 4-6 and 9.
   2579  int start = len - searchLen;  // Start searching here
   2580  if (args.hasDefined(1)) {
   2581    if (args[1].isInt32()) {
   2582      int i = args[1].toInt32();
   2583      if (i <= 0) {
   2584        start = 0;
   2585      } else if (i < start) {
   2586        start = i;
   2587      }
   2588    } else {
   2589      double d;
   2590      if (!ToNumber(cx, args[1], &d)) {
   2591        return false;
   2592      }
   2593      if (!std::isnan(d)) {
   2594        d = JS::ToInteger(d);
   2595        if (d <= 0) {
   2596          start = 0;
   2597        } else if (d < start) {
   2598          start = int(d);
   2599        }
   2600      }
   2601    }
   2602  }
   2603 
   2604  if (str == searchStr) {
   2605    args.rval().setInt32(0);
   2606    return true;
   2607  }
   2608 
   2609  if (searchLen > len) {
   2610    args.rval().setInt32(-1);
   2611    return true;
   2612  }
   2613 
   2614  if (searchLen == 0) {
   2615    args.rval().setInt32(start);
   2616    return true;
   2617  }
   2618  MOZ_ASSERT(0 <= start && size_t(start) < len);
   2619 
   2620  JSLinearString* text = str->ensureLinear(cx);
   2621  if (!text) {
   2622    return false;
   2623  }
   2624 
   2625  // Step 10-12.
   2626  args.rval().setInt32(LastIndexOf(text, searchStr, start));
   2627  return true;
   2628 }
   2629 
   2630 bool js::StringLastIndexOf(JSContext* cx, HandleString string,
   2631                           HandleString searchString, int32_t* result) {
   2632  if (string == searchString) {
   2633    *result = 0;
   2634    return true;
   2635  }
   2636 
   2637  size_t len = string->length();
   2638  size_t searchLen = searchString->length();
   2639 
   2640  if (searchLen > len) {
   2641    *result = -1;
   2642    return true;
   2643  }
   2644 
   2645  MOZ_ASSERT(len >= searchLen);
   2646  size_t start = len - searchLen;
   2647 
   2648  if (searchLen == 0) {
   2649    *result = start;
   2650    return true;
   2651  }
   2652  MOZ_ASSERT(start < len);
   2653 
   2654  JSLinearString* text = string->ensureLinear(cx);
   2655  if (!text) {
   2656    return false;
   2657  }
   2658 
   2659  JSLinearString* searchStr = searchString->ensureLinear(cx);
   2660  if (!searchStr) {
   2661    return false;
   2662  }
   2663 
   2664  *result = LastIndexOf(text, searchStr, start);
   2665  return true;
   2666 }
   2667 
   2668 // ES2026 draft rev a562082b031d89d00ee667181ce8a6158656bd4b
   2669 // 22.1.3.24 String.prototype.startsWith ( searchString [ , position ] )
   2670 bool js::str_startsWith(JSContext* cx, unsigned argc, Value* vp) {
   2671  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "startsWith");
   2672  CallArgs args = CallArgsFromVp(argc, vp);
   2673 
   2674  // Steps 1-2.
   2675  RootedString str(cx,
   2676                   ToStringForStringFunction(cx, "startsWith", args.thisv()));
   2677  if (!str) {
   2678    return false;
   2679  }
   2680 
   2681  // Steps 3-4.
   2682  if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
   2683    return false;
   2684  }
   2685 
   2686  // Step 5.
   2687  Rooted<JSLinearString*> searchStr(cx, ArgToLinearString(cx, args, 0));
   2688  if (!searchStr) {
   2689    return false;
   2690  }
   2691 
   2692  // Step 6.
   2693  uint32_t textLen = str->length();
   2694 
   2695  // Steps 7-8.
   2696  uint32_t start = 0;
   2697  if (args.hasDefined(1)) {
   2698    if (!ToClampedStringIndex(cx, args[1], textLen, &start)) {
   2699      return false;
   2700    }
   2701  }
   2702 
   2703  // Step 9.
   2704  uint32_t searchLen = searchStr->length();
   2705 
   2706  // Step 12.
   2707  if (searchLen + start < searchLen || searchLen + start > textLen) {
   2708    args.rval().setBoolean(false);
   2709    return true;
   2710  }
   2711 
   2712  // Steps 10-11 and 13-15.
   2713  JSLinearString* text = str->ensureLinear(cx);
   2714  if (!text) {
   2715    return false;
   2716  }
   2717 
   2718  args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
   2719  return true;
   2720 }
   2721 
   2722 bool js::StringStartsWith(JSContext* cx, HandleString string,
   2723                          HandleString searchString, bool* result) {
   2724  if (searchString->length() > string->length()) {
   2725    *result = false;
   2726    return true;
   2727  }
   2728 
   2729  JSLinearString* str = string->ensureLinear(cx);
   2730  if (!str) {
   2731    return false;
   2732  }
   2733 
   2734  JSLinearString* searchStr = searchString->ensureLinear(cx);
   2735  if (!searchStr) {
   2736    return false;
   2737  }
   2738 
   2739  *result = HasSubstringAt(str, searchStr, 0);
   2740  return true;
   2741 }
   2742 
   2743 // ES2026 draft rev a562082b031d89d00ee667181ce8a6158656bd4b
   2744 // 22.1.3.7 String.prototype.endsWith ( searchString [ , endPosition ] )
   2745 bool js::str_endsWith(JSContext* cx, unsigned argc, Value* vp) {
   2746  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "endsWith");
   2747  CallArgs args = CallArgsFromVp(argc, vp);
   2748 
   2749  // Steps 1-2.
   2750  RootedString str(cx, ToStringForStringFunction(cx, "endsWith", args.thisv()));
   2751  if (!str) {
   2752    return false;
   2753  }
   2754 
   2755  // Steps 3-4.
   2756  if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
   2757    return false;
   2758  }
   2759 
   2760  // Step 5.
   2761  Rooted<JSLinearString*> searchStr(cx, ArgToLinearString(cx, args, 0));
   2762  if (!searchStr) {
   2763    return false;
   2764  }
   2765 
   2766  // Step 6.
   2767  uint32_t textLen = str->length();
   2768 
   2769  // Steps 7-8.
   2770  uint32_t end = textLen;
   2771  if (args.hasDefined(1)) {
   2772    if (!ToClampedStringIndex(cx, args[1], textLen, &end)) {
   2773      return false;
   2774    }
   2775  }
   2776 
   2777  // Step 9.
   2778  uint32_t searchLen = searchStr->length();
   2779 
   2780  // Step 12 (reordered).
   2781  if (searchLen > end) {
   2782    args.rval().setBoolean(false);
   2783    return true;
   2784  }
   2785 
   2786  // Step 11.
   2787  uint32_t start = end - searchLen;
   2788 
   2789  // Steps 10 and 13-15.
   2790  JSLinearString* text = str->ensureLinear(cx);
   2791  if (!text) {
   2792    return false;
   2793  }
   2794 
   2795  args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
   2796  return true;
   2797 }
   2798 
   2799 bool js::StringEndsWith(JSContext* cx, HandleString string,
   2800                        HandleString searchString, bool* result) {
   2801  if (searchString->length() > string->length()) {
   2802    *result = false;
   2803    return true;
   2804  }
   2805 
   2806  JSLinearString* str = string->ensureLinear(cx);
   2807  if (!str) {
   2808    return false;
   2809  }
   2810 
   2811  JSLinearString* searchStr = searchString->ensureLinear(cx);
   2812  if (!searchStr) {
   2813    return false;
   2814  }
   2815 
   2816  uint32_t start = str->length() - searchStr->length();
   2817 
   2818  *result = HasSubstringAt(str, searchStr, start);
   2819  return true;
   2820 }
   2821 
   2822 template <typename CharT>
   2823 static void TrimString(const CharT* chars, bool trimStart, bool trimEnd,
   2824                       size_t length, size_t* pBegin, size_t* pEnd) {
   2825  size_t begin = 0, end = length;
   2826 
   2827  if (trimStart) {
   2828    while (begin < length && unicode::IsSpace(chars[begin])) {
   2829      ++begin;
   2830    }
   2831  }
   2832 
   2833  if (trimEnd) {
   2834    while (end > begin && unicode::IsSpace(chars[end - 1])) {
   2835      --end;
   2836    }
   2837  }
   2838 
   2839  *pBegin = begin;
   2840  *pEnd = end;
   2841 }
   2842 
   2843 static JSLinearString* TrimString(JSContext* cx, JSString* str, bool trimStart,
   2844                                  bool trimEnd) {
   2845  JSLinearString* linear = str->ensureLinear(cx);
   2846  if (!linear) {
   2847    return nullptr;
   2848  }
   2849 
   2850  size_t length = linear->length();
   2851  size_t begin, end;
   2852  if (linear->hasLatin1Chars()) {
   2853    AutoCheckCannotGC nogc;
   2854    TrimString(linear->latin1Chars(nogc), trimStart, trimEnd, length, &begin,
   2855               &end);
   2856  } else {
   2857    AutoCheckCannotGC nogc;
   2858    TrimString(linear->twoByteChars(nogc), trimStart, trimEnd, length, &begin,
   2859               &end);
   2860  }
   2861 
   2862  return NewDependentString(cx, linear, begin, end - begin);
   2863 }
   2864 
   2865 JSString* js::StringTrim(JSContext* cx, HandleString string) {
   2866  return TrimString(cx, string, true, true);
   2867 }
   2868 
   2869 JSString* js::StringTrimStart(JSContext* cx, HandleString string) {
   2870  return TrimString(cx, string, true, false);
   2871 }
   2872 
   2873 JSString* js::StringTrimEnd(JSContext* cx, HandleString string) {
   2874  return TrimString(cx, string, false, true);
   2875 }
   2876 
   2877 static bool TrimString(JSContext* cx, const CallArgs& args, const char* funName,
   2878                       bool trimStart, bool trimEnd) {
   2879  JSString* str = ToStringForStringFunction(cx, funName, args.thisv());
   2880  if (!str) {
   2881    return false;
   2882  }
   2883 
   2884  JSLinearString* result = TrimString(cx, str, trimStart, trimEnd);
   2885  if (!result) {
   2886    return false;
   2887  }
   2888 
   2889  args.rval().setString(result);
   2890  return true;
   2891 }
   2892 
   2893 static bool str_trim(JSContext* cx, unsigned argc, Value* vp) {
   2894  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "trim");
   2895  CallArgs args = CallArgsFromVp(argc, vp);
   2896  return TrimString(cx, args, "trim", true, true);
   2897 }
   2898 
   2899 static bool str_trimStart(JSContext* cx, unsigned argc, Value* vp) {
   2900  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "trimStart");
   2901  CallArgs args = CallArgsFromVp(argc, vp);
   2902  return TrimString(cx, args, "trimStart", true, false);
   2903 }
   2904 
   2905 static bool str_trimEnd(JSContext* cx, unsigned argc, Value* vp) {
   2906  AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "trimEnd");
   2907  CallArgs args = CallArgsFromVp(argc, vp);
   2908  return TrimString(cx, args, "trimEnd", false, true);
   2909 }
   2910 
   2911 // Utility for building a rope (lazy concatenation) of strings.
   2912 class RopeBuilder {
   2913  JSContext* cx;
   2914  RootedString res;
   2915 
   2916  RopeBuilder(const RopeBuilder& other) = delete;
   2917  void operator=(const RopeBuilder& other) = delete;
   2918 
   2919 public:
   2920  explicit RopeBuilder(JSContext* cx)
   2921      : cx(cx), res(cx, cx->runtime()->emptyString) {}
   2922 
   2923  inline bool append(HandleString str) {
   2924    res = ConcatStrings<CanGC>(cx, res, str);
   2925    return !!res;
   2926  }
   2927 
   2928  inline JSString* result() { return res; }
   2929 };
   2930 
   2931 namespace {
   2932 
   2933 template <typename CharT>
   2934 static uint32_t FindDollarIndex(const CharT* chars, size_t length) {
   2935  if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
   2936    uint32_t dollarIndex = p - chars;
   2937    MOZ_ASSERT(dollarIndex < length);
   2938    return dollarIndex;
   2939  }
   2940  return UINT32_MAX;
   2941 }
   2942 
   2943 } /* anonymous namespace */
   2944 
   2945 /*
   2946 * Constructs a result string that looks like:
   2947 *
   2948 *      newstring = string[:matchStart] + repstr + string[matchEnd:]
   2949 */
   2950 static JSString* BuildFlatReplacement(JSContext* cx, HandleString textstr,
   2951                                      Handle<JSLinearString*> repstr,
   2952                                      size_t matchStart, size_t patternLength) {
   2953  size_t matchEnd = matchStart + patternLength;
   2954 
   2955  RootedString resultStr(cx, NewDependentString(cx, textstr, 0, matchStart));
   2956  if (!resultStr) {
   2957    return nullptr;
   2958  }
   2959 
   2960  resultStr = ConcatStrings<CanGC>(cx, resultStr, repstr);
   2961  if (!resultStr) {
   2962    return nullptr;
   2963  }
   2964 
   2965  MOZ_ASSERT(textstr->length() >= matchEnd);
   2966  RootedString rest(cx, NewDependentString(cx, textstr, matchEnd,
   2967                                           textstr->length() - matchEnd));
   2968  if (!rest) {
   2969    return nullptr;
   2970  }
   2971 
   2972  return ConcatStrings<CanGC>(cx, resultStr, rest);
   2973 }
   2974 
   2975 static JSString* BuildFlatRopeReplacement(JSContext* cx, HandleString textstr,
   2976                                          Handle<JSLinearString*> repstr,
   2977                                          size_t match, size_t patternLength) {
   2978  MOZ_ASSERT(textstr->isRope());
   2979 
   2980  size_t matchEnd = match + patternLength;
   2981 
   2982  /*
   2983   * If we are replacing over a rope, avoid flattening it by iterating
   2984   * through it, building a new rope.
   2985   */
   2986  StringSegmentRange r(cx);
   2987  if (!r.init(textstr)) {
   2988    return nullptr;
   2989  }
   2990 
   2991  RopeBuilder builder(cx);
   2992 
   2993  /*
   2994   * Special case when the pattern string is '', which matches to the
   2995   * head of the string and doesn't overlap with any component of the rope.
   2996   */
   2997  if (patternLength == 0) {
   2998    MOZ_ASSERT(match == 0);
   2999    if (!builder.append(repstr)) {
   3000      return nullptr;
   3001    }
   3002  }
   3003 
   3004  size_t pos = 0;
   3005  while (!r.empty()) {
   3006    RootedString str(cx, r.front());
   3007    size_t len = str->length();
   3008    size_t strEnd = pos + len;
   3009    if (pos < matchEnd && strEnd > match) {
   3010      /*
   3011       * We need to special-case any part of the rope that overlaps
   3012       * with the replacement string.
   3013       */
   3014      if (match >= pos) {
   3015        /*
   3016         * If this part of the rope overlaps with the left side of
   3017         * the pattern, then it must be the only one to overlap with
   3018         * the first character in the pattern, so we include the
   3019         * replacement string here.
   3020         */
   3021        RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
   3022        if (!leftSide || !builder.append(leftSide) || !builder.append(repstr)) {
   3023          return nullptr;
   3024        }
   3025      }
   3026 
   3027      /*
   3028       * If str runs off the end of the matched string, append the
   3029       * last part of str.
   3030       */
   3031      if (strEnd > matchEnd) {
   3032        RootedString rightSide(
   3033            cx, NewDependentString(cx, str, matchEnd - pos, strEnd - matchEnd));
   3034        if (!rightSide || !builder.append(rightSide)) {
   3035          return nullptr;
   3036        }
   3037      }
   3038    } else {
   3039      if (!builder.append(str)) {
   3040        return nullptr;
   3041      }
   3042    }
   3043    pos += str->length();
   3044    if (!r.popFront()) {
   3045      return nullptr;
   3046    }
   3047  }
   3048 
   3049  return builder.result();
   3050 }
   3051 
   3052 template <typename CharT>
   3053 static bool AppendDollarReplacement(StringBuilder& newReplaceChars,
   3054                                    size_t firstDollarIndex, size_t matchStart,
   3055                                    size_t matchLimit,
   3056                                    const JSLinearString* text,
   3057                                    const CharT* repChars, size_t repLength) {
   3058  MOZ_ASSERT(firstDollarIndex < repLength);
   3059  MOZ_ASSERT(matchStart <= matchLimit);
   3060  MOZ_ASSERT(matchLimit <= text->length());
   3061 
   3062  // Move the pre-dollar chunk in bulk.
   3063  if (!newReplaceChars.append(repChars, firstDollarIndex)) {
   3064    return false;
   3065  }
   3066 
   3067  // Move the rest char-by-char, interpreting dollars as we encounter them.
   3068  const CharT* repLimit = repChars + repLength;
   3069  for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
   3070    if (*it != '$' || it == repLimit - 1) {
   3071      if (!newReplaceChars.append(*it)) {
   3072        return false;
   3073      }
   3074      continue;
   3075    }
   3076 
   3077    switch (*(it + 1)) {
   3078      case '$':
   3079        // Eat one of the dollars.
   3080        if (!newReplaceChars.append(*it)) {
   3081          return false;
   3082        }
   3083        break;
   3084      case '&':
   3085        if (!newReplaceChars.appendSubstring(text, matchStart,
   3086                                             matchLimit - matchStart)) {
   3087          return false;
   3088        }
   3089        break;
   3090      case '`':
   3091        if (!newReplaceChars.appendSubstring(text, 0, matchStart)) {
   3092          return false;
   3093        }
   3094        break;
   3095      case '\'':
   3096        if (!newReplaceChars.appendSubstring(text, matchLimit,
   3097                                             text->length() - matchLimit)) {
   3098          return false;
   3099        }
   3100        break;
   3101      default:
   3102        // The dollar we saw was not special (no matter what its mother told
   3103        // it).
   3104        if (!newReplaceChars.append(*it)) {
   3105          return false;
   3106        }
   3107        continue;
   3108    }
   3109    ++it;  // We always eat an extra char in the above switch.
   3110  }
   3111 
   3112  return true;
   3113 }
   3114 
   3115 /*
   3116 * Perform a linear-scan dollar substitution on the replacement text.
   3117 */
   3118 static JSLinearString* InterpretDollarReplacement(
   3119    JSContext* cx, HandleString textstrArg, Handle<JSLinearString*> repstr,
   3120    uint32_t firstDollarIndex, size_t matchStart, size_t patternLength) {
   3121  Rooted<JSLinearString*> textstr(cx, textstrArg->ensureLinear(cx));
   3122  if (!textstr) {
   3123    return nullptr;
   3124  }
   3125 
   3126  size_t matchLimit = matchStart + patternLength;
   3127 
   3128  /*
   3129   * Most probably:
   3130   *
   3131   *      len(newstr) >= len(orig) - len(match) + len(replacement)
   3132   *
   3133   * Note that dollar vars _could_ make the resulting text smaller than this.
   3134   */
   3135  JSStringBuilder newReplaceChars(cx);
   3136  if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars()) {
   3137    return nullptr;
   3138  }
   3139 
   3140  if (!newReplaceChars.reserve(textstr->length() - patternLength +
   3141                               repstr->length())) {
   3142    return nullptr;
   3143  }
   3144 
   3145  bool res;
   3146  if (repstr->hasLatin1Chars()) {
   3147    AutoCheckCannotGC nogc;
   3148    res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
   3149                                  matchLimit, textstr,
   3150                                  repstr->latin1Chars(nogc), repstr->length());
   3151  } else {
   3152    AutoCheckCannotGC nogc;
   3153    res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
   3154                                  matchLimit, textstr,
   3155                                  repstr->twoByteChars(nogc), repstr->length());
   3156  }
   3157  if (!res) {
   3158    return nullptr;
   3159  }
   3160 
   3161  return newReplaceChars.finishString();
   3162 }
   3163 
   3164 template <typename StrChar, typename RepChar>
   3165 static bool StrFlatReplaceGlobal(JSContext* cx, const JSLinearString* str,
   3166                                 const JSLinearString* pat,
   3167                                 const JSLinearString* rep, StringBuilder& sb) {
   3168  MOZ_ASSERT(str->length() > 0);
   3169 
   3170  AutoCheckCannotGC nogc;
   3171  const StrChar* strChars = str->chars<StrChar>(nogc);
   3172  const RepChar* repChars = rep->chars<RepChar>(nogc);
   3173 
   3174  // The pattern is empty, so we interleave the replacement string in-between
   3175  // each character.
   3176  if (!pat->length()) {
   3177    CheckedInt<uint32_t> strLength(str->length());
   3178    CheckedInt<uint32_t> repLength(rep->length());
   3179    CheckedInt<uint32_t> length = repLength * (strLength - 1) + strLength;
   3180    if (!length.isValid()) {
   3181      ReportAllocationOverflow(cx);
   3182      return false;
   3183    }
   3184 
   3185    if (!sb.reserve(length.value())) {
   3186      return false;
   3187    }
   3188 
   3189    for (unsigned i = 0; i < str->length() - 1; ++i, ++strChars) {
   3190      sb.infallibleAppend(*strChars);
   3191      sb.infallibleAppend(repChars, rep->length());
   3192    }
   3193    sb.infallibleAppend(*strChars);
   3194    return true;
   3195  }
   3196 
   3197  // If it's true, we are sure that the result's length is, at least, the same
   3198  // length as |str->length()|.
   3199  if (rep->length() >= pat->length()) {
   3200    if (!sb.reserve(str->length())) {
   3201      return false;
   3202    }
   3203  }
   3204 
   3205  uint32_t start = 0;
   3206  for (;;) {
   3207    int match = StringMatch(str, pat, start);
   3208    if (match < 0) {
   3209      break;
   3210    }
   3211    if (!sb.append(strChars + start, match - start)) {
   3212      return false;
   3213    }
   3214    if (!sb.append(repChars, rep->length())) {
   3215      return false;
   3216    }
   3217    start = match + pat->length();
   3218  }
   3219 
   3220  if (!sb.append(strChars + start, str->length() - start)) {
   3221    return false;
   3222  }
   3223 
   3224  return true;
   3225 }
   3226 
   3227 // This is identical to "str.split(pattern).join(replacement)" except that we
   3228 // do some deforestation optimization in Ion.
   3229 JSString* js::StringFlatReplaceString(JSContext* cx, HandleString string,
   3230                                      HandleString pattern,
   3231                                      HandleString replacement) {
   3232  MOZ_ASSERT(string);
   3233  MOZ_ASSERT(pattern);
   3234  MOZ_ASSERT(replacement);
   3235 
   3236  if (!string->length()) {
   3237    return string;
   3238  }
   3239 
   3240  Rooted<JSLinearString*> linearRepl(cx, replacement->ensureLinear(cx));
   3241  if (!linearRepl) {
   3242    return nullptr;
   3243  }
   3244 
   3245  Rooted<JSLinearString*> linearPat(cx, pattern->ensureLinear(cx));
   3246  if (!linearPat) {
   3247    return nullptr;
   3248  }
   3249 
   3250  Rooted<JSLinearString*> linearStr(cx, string->ensureLinear(cx));
   3251  if (!linearStr) {
   3252    return nullptr;
   3253  }
   3254 
   3255  JSStringBuilder sb(cx);
   3256  if (linearStr->hasTwoByteChars()) {
   3257    if (!sb.ensureTwoByteChars()) {
   3258      return nullptr;
   3259    }
   3260    if (linearRepl->hasTwoByteChars()) {
   3261      if (!StrFlatReplaceGlobal<char16_t, char16_t>(cx, linearStr, linearPat,
   3262                                                    linearRepl, sb)) {
   3263        return nullptr;
   3264      }
   3265    } else {
   3266      if (!StrFlatReplaceGlobal<char16_t, Latin1Char>(cx, linearStr, linearPat,
   3267                                                      linearRepl, sb)) {
   3268        return nullptr;
   3269      }
   3270    }
   3271  } else {
   3272    if (linearRepl->hasTwoByteChars()) {
   3273      if (!sb.ensureTwoByteChars()) {
   3274        return nullptr;
   3275      }
   3276      if (!StrFlatReplaceGlobal<Latin1Char, char16_t>(cx, linearStr, linearPat,
   3277                                                      linearRepl, sb)) {
   3278        return nullptr;
   3279      }
   3280    } else {
   3281      if (!StrFlatReplaceGlobal<Latin1Char, Latin1Char>(
   3282              cx, linearStr, linearPat, linearRepl, sb)) {
   3283        return nullptr;
   3284      }
   3285    }
   3286  }
   3287 
   3288  return sb.finishString();
   3289 }
   3290 
   3291 JSString* js::str_replace_string_raw(JSContext* cx, HandleString string,
   3292                                     HandleString pattern,
   3293                                     HandleString replacement) {
   3294  Rooted<JSLinearString*> pat(cx, pattern->ensureLinear(cx));
   3295  if (!pat) {
   3296    return nullptr;
   3297  }
   3298 
   3299  /*
   3300   * |string| could be a rope, so we want to avoid flattening it for as
   3301   * long as possible.
   3302   */
   3303  int32_t match;
   3304  if (string->isRope()) {
   3305    if (!RopeMatch(cx, &string->asRope(), pat, &match)) {
   3306      return nullptr;
   3307    }
   3308  } else {
   3309    match = StringMatch(&string->asLinear(), pat, 0);
   3310  }
   3311 
   3312  if (match < 0) {
   3313    return string;
   3314  }
   3315 
   3316  Rooted<JSLinearString*> repl(cx, replacement->ensureLinear(cx));
   3317  if (!repl) {
   3318    return nullptr;
   3319  }
   3320  uint32_t dollarIndex;
   3321  {
   3322    AutoCheckCannotGC nogc;
   3323    dollarIndex =
   3324        repl->hasLatin1Chars()
   3325            ? FindDollarIndex(repl->latin1Chars(nogc), repl->length())
   3326            : FindDollarIndex(repl->twoByteChars(nogc), repl->length());
   3327  }
   3328 
   3329  size_t patternLength = pat->length();
   3330 
   3331  if (dollarIndex != UINT32_MAX) {
   3332    repl = InterpretDollarReplacement(cx, string, repl, dollarIndex, match,
   3333                                      patternLength);
   3334    if (!repl) {
   3335      return nullptr;
   3336    }
   3337  } else if (string->isRope()) {
   3338    return BuildFlatRopeReplacement(cx, string, repl, match, patternLength);
   3339  }
   3340  return BuildFlatReplacement(cx, string, repl, match, patternLength);
   3341 }
   3342 
   3343 template <typename StrChar, typename RepChar>
   3344 static bool ReplaceAllInternal(const AutoCheckCannotGC& nogc,
   3345                               const JSLinearString* string,
   3346                               const JSLinearString* searchString,
   3347                               const JSLinearString* replaceString,
   3348                               const int32_t startPosition,
   3349                               JSStringBuilder& result) {
   3350  // Step 7.
   3351  const size_t stringLength = string->length();
   3352  const size_t searchLength = searchString->length();
   3353  const size_t replaceLength = replaceString->length();
   3354 
   3355  MOZ_ASSERT(stringLength > 0);
   3356  MOZ_ASSERT(searchLength > 0);
   3357  MOZ_ASSERT(stringLength >= searchLength);
   3358 
   3359  // Step 12.
   3360  uint32_t endOfLastMatch = 0;
   3361 
   3362  const StrChar* strChars = string->chars<StrChar>(nogc);
   3363  const RepChar* repChars = replaceString->chars<RepChar>(nogc);
   3364 
   3365  uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength);
   3366 
   3367  // If it's true, we are sure that the result's length is, at least, the same
   3368  // length as |str->length()|.
   3369  if (replaceLength >= searchLength) {
   3370    if (!result.reserve(stringLength)) {
   3371      return false;
   3372    }
   3373  }
   3374 
   3375  int32_t position = startPosition;
   3376  do {
   3377    // Step 14.c.
   3378    // Append the substring before the current match.
   3379    if (!result.append(strChars + endOfLastMatch, position - endOfLastMatch)) {
   3380      return false;
   3381    }
   3382 
   3383    // Steps 14.a-b and 14.d.
   3384    // Append the replacement.
   3385    if (dollarIndex != UINT32_MAX) {
   3386      size_t matchLimit = position + searchLength;
   3387      if (!AppendDollarReplacement(result, dollarIndex, position, matchLimit,
   3388                                   string, repChars, replaceLength)) {
   3389        return false;
   3390      }
   3391    } else {
   3392      if (!result.append(repChars, replaceLength)) {
   3393        return false;
   3394      }
   3395    }
   3396 
   3397    // Step 14.e.
   3398    endOfLastMatch = position + searchLength;
   3399 
   3400    // Step 11.
   3401    // Find the next match.
   3402    position = StringMatch(string, searchString, endOfLastMatch);
   3403  } while (position >= 0);
   3404 
   3405  // Step 15.
   3406  // Append the substring after the last match.
   3407  return result.append(strChars + endOfLastMatch,
   3408                       stringLength - endOfLastMatch);
   3409 }
   3410 
   3411 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall
   3412 // Steps 7-16 when functionalReplace is false and searchString is not empty.
   3413 //
   3414 // The steps are quite different, for performance. Loops in steps 11 and 14
   3415 // are fused. GetSubstitution is optimized away when possible.
   3416 template <typename StrChar, typename RepChar>
   3417 static JSString* ReplaceAll(JSContext* cx, JSLinearString* string,
   3418                            const JSLinearString* searchString,
   3419                            const JSLinearString* replaceString) {
   3420  // Step 7 moved into ReplaceAll_internal.
   3421 
   3422  // Step 8 (advanceBy is equal to searchLength when searchLength > 0).
   3423 
   3424  // Step 9 (not needed in this implementation).
   3425 
   3426  // Step 10.
   3427  // Find the first match.
   3428  int32_t position = StringMatch(string, searchString, 0);
   3429 
   3430  // Nothing to replace, so return early.
   3431  if (position < 0) {
   3432    return string;
   3433  }
   3434 
   3435  // Steps 11, 12 moved into ReplaceAll_internal.
   3436 
   3437  // Step 13.
   3438  JSStringBuilder result(cx);
   3439  if constexpr (std::is_same_v<StrChar, char16_t> ||
   3440                std::is_same_v<RepChar, char16_t>) {
   3441    if (!result.ensureTwoByteChars()) {
   3442      return nullptr;
   3443    }
   3444  }
   3445 
   3446  bool internalFailure = false;
   3447  {
   3448    AutoCheckCannotGC nogc;
   3449    internalFailure = !ReplaceAllInternal<StrChar, RepChar>(
   3450        nogc, string, searchString, replaceString, position, result);
   3451  }
   3452  if (internalFailure) {
   3453    return nullptr;
   3454  }
   3455 
   3456  // Step 16.
   3457  return result.finishString();
   3458 }
   3459 
   3460 template <typename StrChar, typename RepChar>
   3461 static bool ReplaceAllInterleaveInternal(const AutoCheckCannotGC& nogc,
   3462                                         JSContext* cx,
   3463                                         const JSLinearString* string,
   3464                                         const JSLinearString* replaceString,
   3465                                         JSStringBuilder& result) {
   3466  // Step 7.
   3467  const size_t stringLength = string->length();
   3468  const size_t replaceLength = replaceString->length();
   3469 
   3470  const StrChar* strChars = string->chars<StrChar>(nogc);
   3471  const RepChar* repChars = replaceString->chars<RepChar>(nogc);
   3472 
   3473  uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength);
   3474 
   3475  if (dollarIndex != UINT32_MAX) {
   3476    if (!result.reserve(stringLength)) {
   3477      return false;
   3478    }
   3479  } else {
   3480    // Compute the exact result length when no substitutions take place.
   3481    CheckedInt<uint32_t> strLength(stringLength);
   3482    CheckedInt<uint32_t> repLength(replaceLength);
   3483    CheckedInt<uint32_t> length = strLength + (strLength + 1) * repLength;
   3484    if (!length.isValid()) {
   3485      ReportAllocationOverflow(cx);
   3486      return false;
   3487    }
   3488 
   3489    if (!result.reserve(length.value())) {
   3490      return false;
   3491    }
   3492  }
   3493 
   3494  auto appendReplacement = [&](size_t match) {
   3495    if (dollarIndex != UINT32_MAX) {
   3496      return AppendDollarReplacement(result, dollarIndex, match, match, string,
   3497                                     repChars, replaceLength);
   3498    }
   3499    return result.append(repChars, replaceLength);
   3500  };
   3501 
   3502  for (size_t index = 0; index < stringLength; index++) {
   3503    // Steps 11, 14.a-b and 14.d.
   3504    // The empty string matches before each character.
   3505    if (!appendReplacement(index)) {
   3506      return false;
   3507    }
   3508 
   3509    // Step 14.c.
   3510    if (!result.append(strChars[index])) {
   3511      return false;
   3512    }
   3513  }
   3514 
   3515  // Steps 11, 14.a-b and 14.d.
   3516  // The empty string also matches at the end of the string.
   3517  return appendReplacement(stringLength);
   3518 
   3519  // Step 15 (not applicable when searchString is the empty string).
   3520 }
   3521 
   3522 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall
   3523 // Steps 7-16 when functionalReplace is false and searchString is the empty
   3524 // string.
   3525 //
   3526 // The steps are quite different, for performance. Loops in steps 11 and 14
   3527 // are fused. GetSubstitution is optimized away when possible.
   3528 template <typename StrChar, typename RepChar>
   3529 static JSString* ReplaceAllInterleave(JSContext* cx,
   3530                                      const JSLinearString* string,
   3531                                      const JSLinearString* replaceString) {
   3532  // Step 7 moved into ReplaceAllInterleavedInternal.
   3533 
   3534  // Step 8 (advanceBy is 1 when searchString is the empty string).
   3535 
   3536  // Steps 9-12 (trivial when searchString is the empty string).
   3537 
   3538  // Step 13.
   3539  JSStringBuilder result(cx);
   3540  if constexpr (std::is_same_v<StrChar, char16_t> ||
   3541                std::is_same_v<RepChar, char16_t>) {
   3542    if (!result.ensureTwoByteChars()) {
   3543      return nullptr;
   3544    }
   3545  }
   3546 
   3547  bool internalFailure = false;
   3548  {
   3549    AutoCheckCannotGC nogc;
   3550    internalFailure = !ReplaceAllInterleaveInternal<StrChar, RepChar>(
   3551        nogc, cx, string, replaceString, result);
   3552  }
   3553  if (internalFailure) {
   3554    return nullptr;
   3555  }
   3556 
   3557  // Step 16.
   3558  return result.finishString();
   3559 }
   3560 
   3561 // String.prototype.replaceAll (Stage 3 proposal)
   3562 // https://tc39.es/proposal-string-replaceall/
   3563 //
   3564 // String.prototype.replaceAll ( searchValue, replaceValue )
   3565 //
   3566 // Steps 7-16 when functionalReplace is false.
   3567 JSString* js::str_replaceAll_string_raw(JSContext* cx, HandleString string,
   3568                                        HandleString searchString,
   3569                                        HandleString replaceString) {
   3570  const size_t stringLength = string->length();
   3571  const size_t searchLength = searchString->length();
   3572 
   3573  // Directly return when we're guaranteed to find no match.
   3574  if (searchLength > stringLength) {
   3575    return string;
   3576  }
   3577 
   3578  Rooted<JSLinearString*> str(cx, string->ensureLinear(cx));
   3579  if (!str) {
   3580    return nullptr;
   3581  }
   3582 
   3583  Rooted<JSLinearString*> repl(cx, replaceString->ensureLinear(cx));
   3584  if (!repl) {
   3585    return nullptr;
   3586  }
   3587 
   3588  Rooted<JSLinearString*> search(cx, searchString->ensureLinear(cx));
   3589  if (!search) {
   3590    return nullptr;
   3591  }
   3592 
   3593  // The pattern is empty, so we interleave the replacement string in-between
   3594  // each character.
   3595  if (searchLength == 0) {
   3596    if (str->hasTwoByteChars()) {
   3597      if (repl->hasTwoByteChars()) {
   3598        return ReplaceAllInterleave<char16_t, char16_t>(cx, str, repl);
   3599      }
   3600      return ReplaceAllInterleave<char16_t, Latin1Char>(cx, str, repl);
   3601    }
   3602    if (repl->hasTwoByteChars()) {
   3603      return ReplaceAllInterleave<Latin1Char, char16_t>(cx, str, repl);
   3604    }
   3605    return ReplaceAllInterleave<Latin1Char, Latin1Char>(cx, str, repl);
   3606  }
   3607 
   3608  MOZ_ASSERT(stringLength > 0);
   3609 
   3610  if (str->hasTwoByteChars()) {
   3611    if (repl->hasTwoByteChars()) {
   3612      return ReplaceAll<char16_t, char16_t>(cx, str, search, repl);
   3613    }
   3614    return ReplaceAll<char16_t, Latin1Char>(cx, str, search, repl);
   3615  }
   3616  if (repl->hasTwoByteChars()) {
   3617    return ReplaceAll<Latin1Char, char16_t>(cx, str, search, repl);
   3618  }
   3619  return ReplaceAll<Latin1Char, Latin1Char>(cx, str, search, repl);
   3620 }
   3621 
   3622 static ArrayObject* SingleElementStringArray(JSContext* cx,
   3623                                             Handle<JSLinearString*> str) {
   3624  ArrayObject* array = NewDenseFullyAllocatedArray(cx, 1);
   3625  if (!array) {
   3626    return nullptr;
   3627  }
   3628  array->setDenseInitializedLength(1);
   3629  array->initDenseElement(0, StringValue(str));
   3630  return array;
   3631 }
   3632 
   3633 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
   3634 static ArrayObject* SplitHelper(JSContext* cx, Handle<JSLinearString*> str,
   3635                                uint32_t limit, Handle<JSLinearString*> sep) {
   3636  size_t strLength = str->length();
   3637  size_t sepLength = sep->length();
   3638  MOZ_ASSERT(sepLength != 0);
   3639 
   3640  // Step 12.
   3641  if (strLength == 0) {
   3642    // Step 12.a.
   3643    int match = StringMatch(str, sep, 0);
   3644 
   3645    // Step 12.b.
   3646    if (match != -1) {
   3647      return NewDenseEmptyArray(cx);
   3648    }
   3649 
   3650    // Steps 12.c-e.
   3651    return SingleElementStringArray(cx, str);
   3652  }
   3653 
   3654  // Step 3 (reordered).
   3655  Rooted<ArrayObject*> substrings(cx, NewDenseEmptyArray(cx));
   3656  if (!substrings) {
   3657    return nullptr;
   3658  }
   3659 
   3660  // Switch to allocating in the tenured heap if we fill the nursery.
   3661  AutoSelectGCHeap gcHeap(cx);
   3662 
   3663  // Step 8 (reordered).
   3664  size_t lastEndIndex = 0;
   3665 
   3666  // Step 13.
   3667  size_t index = 0;
   3668 
   3669  // Step 14.
   3670  while (index != strLength) {
   3671    // Step 14.a.
   3672    int match = StringMatch(str, sep, index);
   3673 
   3674    // Step 14.b.
   3675    //
   3676    // Our match algorithm differs from the spec in that it returns the
   3677    // next index at which a match happens.  If no match happens we're
   3678    // done.
   3679    //
   3680    // But what if the match is at the end of the string (and the string is
   3681    // not empty)?  Per 14.c.i this shouldn't be a match, so we have to
   3682    // specially exclude it.  Thus this case should hold:
   3683    //
   3684    //   var a = "abc".split(/\b/);
   3685    //   assertEq(a.length, 1);
   3686    //   assertEq(a[0], "abc");
   3687    if (match == -1) {
   3688      break;
   3689    }
   3690 
   3691    // Step 14.c.
   3692    size_t endIndex = match + sepLength;
   3693 
   3694    // Step 14.c.i.
   3695    if (endIndex == lastEndIndex) {
   3696      index++;
   3697      continue;
   3698    }
   3699 
   3700    // Step 14.c.ii.
   3701    MOZ_ASSERT(lastEndIndex < endIndex);
   3702    MOZ_ASSERT(sepLength <= strLength);
   3703    MOZ_ASSERT(lastEndIndex + sepLength <= endIndex);
   3704 
   3705    // Step 14.c.ii.1.
   3706    size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
   3707    JSString* sub =
   3708        NewDependentString(cx, str, lastEndIndex, subLength, gcHeap);
   3709 
   3710    // Steps 14.c.ii.2-4.
   3711    if (!sub || !NewbornArrayPush(cx, substrings, StringValue(sub))) {
   3712      return nullptr;
   3713    }
   3714 
   3715    // Step 14.c.ii.5.
   3716    if (substrings->length() == limit) {
   3717      return substrings;
   3718    }
   3719 
   3720    // Step 14.c.ii.6.
   3721    index = endIndex;
   3722 
   3723    // Step 14.c.ii.7.
   3724    lastEndIndex = index;
   3725  }
   3726 
   3727  // Step 15.
   3728  size_t subLength = strLength - lastEndIndex;
   3729  JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength, gcHeap);
   3730 
   3731  // Steps 16-17.
   3732  if (!sub || !NewbornArrayPush(cx, substrings, StringValue(sub))) {
   3733    return nullptr;
   3734  }
   3735 
   3736  // Step 18.
   3737  return substrings;
   3738 }
   3739 
   3740 // Fast-path for splitting a string into a character array via split("").
   3741 static ArrayObject* CharSplitHelper(JSContext* cx, Handle<JSLinearString*> str,
   3742                                    uint32_t limit) {
   3743  size_t strLength = str->length();
   3744  if (strLength == 0) {
   3745    return NewDenseEmptyArray(cx);
   3746  }
   3747 
   3748  js::StaticStrings& staticStrings = cx->staticStrings();
   3749  uint32_t resultlen = (limit < strLength ? limit : strLength);
   3750  MOZ_ASSERT(limit > 0 && resultlen > 0,
   3751             "Neither limit nor strLength is zero, so resultlen is greater "
   3752             "than zero.");
   3753 
   3754  Rooted<ArrayObject*> splits(cx, NewDenseFullyAllocatedArray(cx, resultlen));
   3755  if (!splits) {
   3756    return nullptr;
   3757  }
   3758 
   3759  if (str->hasLatin1Chars()) {
   3760    splits->setDenseInitializedLength(resultlen);
   3761 
   3762    JS::AutoCheckCannotGC nogc;
   3763    const Latin1Char* latin1Chars = str->latin1Chars(nogc);
   3764    for (size_t i = 0; i < resultlen; ++i) {
   3765      Latin1Char c = latin1Chars[i];
   3766      MOZ_ASSERT(staticStrings.hasUnit(c));
   3767      splits->initDenseElement(i, StringValue(staticStrings.getUnit(c)));
   3768    }
   3769  } else {
   3770    splits->ensureDenseInitializedLength(0, resultlen);
   3771 
   3772    for (size_t i = 0; i < resultlen; ++i) {
   3773      JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
   3774      if (!sub) {
   3775        return nullptr;
   3776      }
   3777      splits->initDenseElement(i, StringValue(sub));
   3778    }
   3779  }
   3780 
   3781  return splits;
   3782 }
   3783 
   3784 template <typename TextChar>
   3785 static MOZ_ALWAYS_INLINE ArrayObject* SplitSingleCharHelper(
   3786    JSContext* cx, Handle<JSLinearString*> str, const TextChar* text,
   3787    uint32_t textLen, char16_t patCh) {
   3788  // Count the number of occurrences of patCh within text.
   3789  uint32_t count = 0;
   3790  for (size_t index = 0; index < textLen; index++) {
   3791    if (static_cast<char16_t>(text[index]) == patCh) {
   3792      count++;
   3793    }
   3794  }
   3795 
   3796  // Handle zero-occurrence case - return input string in an array.
   3797  if (count == 0) {
   3798    return SingleElementStringArray(cx, str);
   3799  }
   3800 
   3801  // Create the result array for the substring values.
   3802  Rooted<ArrayObject*> splits(cx, NewDenseFullyAllocatedArray(cx, count + 1));
   3803  if (!splits) {
   3804    return nullptr;
   3805  }
   3806  splits->ensureDenseInitializedLength(0, count + 1);
   3807 
   3808  // Add substrings.
   3809  uint32_t splitsIndex = 0;
   3810  size_t lastEndIndex = 0;
   3811  for (size_t index = 0; index < textLen; index++) {
   3812    if (static_cast<char16_t>(text[index]) == patCh) {
   3813      size_t subLength = size_t(index - lastEndIndex);
   3814      JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
   3815      if (!sub) {
   3816        return nullptr;
   3817      }
   3818      splits->initDenseElement(splitsIndex++, StringValue(sub));
   3819      lastEndIndex = index + 1;
   3820    }
   3821  }
   3822 
   3823  // Add substring for tail of string (after last match).
   3824  JSString* sub =
   3825      NewDependentString(cx, str, lastEndIndex, textLen - lastEndIndex);
   3826  if (!sub) {
   3827    return nullptr;
   3828  }
   3829  splits->initDenseElement(splitsIndex++, StringValue(sub));
   3830 
   3831  return splits;
   3832 }
   3833 
   3834 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
   3835 static ArrayObject* SplitSingleCharHelper(JSContext* cx,
   3836                                          Handle<JSLinearString*> str,
   3837                                          char16_t ch) {
   3838  // Step 12.
   3839  size_t strLength = str->length();
   3840 
   3841  AutoStableStringChars linearChars(cx);
   3842  if (!linearChars.init(cx, str)) {
   3843    return nullptr;
   3844  }
   3845 
   3846  if (linearChars.isLatin1()) {
   3847    return SplitSingleCharHelper(cx, str, linearChars.latin1Chars(), strLength,
   3848                                 ch);
   3849  }
   3850 
   3851  return SplitSingleCharHelper(cx, str, linearChars.twoByteChars(), strLength,
   3852                               ch);
   3853 }
   3854 
   3855 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
   3856 ArrayObject* js::StringSplitString(JSContext* cx, HandleString str,
   3857                                   HandleString sep, uint32_t limit) {
   3858  MOZ_ASSERT(limit > 0, "Only called for strictly positive limit.");
   3859 
   3860  Rooted<JSLinearString*> linearStr(cx, str->ensureLinear(cx));
   3861  if (!linearStr) {
   3862    return nullptr;
   3863  }
   3864 
   3865  Rooted<JSLinearString*> linearSep(cx, sep->ensureLinear(cx));
   3866  if (!linearSep) {
   3867    return nullptr;
   3868  }
   3869 
   3870  if (linearSep->length() == 0) {
   3871    return CharSplitHelper(cx, linearStr, limit);
   3872  }
   3873 
   3874  if (linearSep->length() == 1 && limit >= static_cast<uint32_t>(INT32_MAX)) {
   3875    char16_t ch = linearSep->latin1OrTwoByteChar(0);
   3876    return SplitSingleCharHelper(cx, linearStr, ch);
   3877  }
   3878 
   3879  return SplitHelper(cx, linearStr, limit, linearSep);
   3880 }
   3881 
   3882 static const JSFunctionSpec string_methods[] = {
   3883    JS_FN("toSource", str_toSource, 0, 0),
   3884 
   3885    /* Java-like methods. */
   3886    JS_INLINABLE_FN("toString", str_toString, 0, 0, StringToString),
   3887    JS_INLINABLE_FN("valueOf", str_toString, 0, 0, StringValueOf),
   3888    JS_INLINABLE_FN("toLowerCase", str_toLowerCase, 0, 0, StringToLowerCase),
   3889    JS_INLINABLE_FN("toUpperCase", str_toUpperCase, 0, 0, StringToUpperCase),
   3890    JS_INLINABLE_FN("charAt", str_charAt, 1, 0, StringCharAt),
   3891    JS_INLINABLE_FN("charCodeAt", str_charCodeAt, 1, 0, StringCharCodeAt),
   3892    JS_INLINABLE_FN("codePointAt", str_codePointAt, 1, 0, StringCodePointAt),
   3893    JS_INLINABLE_FN("at", str_at, 1, 0, StringAt),
   3894    JS_SELF_HOSTED_FN("substring", "String_substring", 2, 0),
   3895    JS_SELF_HOSTED_FN("padStart", "String_pad_start", 2, 0),
   3896    JS_SELF_HOSTED_FN("padEnd", "String_pad_end", 2, 0),
   3897    JS_INLINABLE_FN("includes", str_includes, 1, 0, StringIncludes),
   3898    JS_INLINABLE_FN("indexOf", str_indexOf, 1, 0, StringIndexOf),
   3899    JS_INLINABLE_FN("lastIndexOf", str_lastIndexOf, 1, 0, StringLastIndexOf),
   3900    JS_INLINABLE_FN("startsWith", str_startsWith, 1, 0, StringStartsWith),
   3901    JS_INLINABLE_FN("endsWith", str_endsWith, 1, 0, StringEndsWith),
   3902    JS_INLINABLE_FN("trim", str_trim, 0, 0, StringTrim),
   3903    JS_INLINABLE_FN("trimStart", str_trimStart, 0, 0, StringTrimStart),
   3904    JS_INLINABLE_FN("trimEnd", str_trimEnd, 0, 0, StringTrimEnd),
   3905    JS_INLINABLE_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0, 0,
   3906                    StringToLocaleLowerCase),
   3907    JS_INLINABLE_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0, 0,
   3908                    StringToLocaleUpperCase),
   3909    JS_FN("localeCompare", str_localeCompare, 1, 0),
   3910    JS_SELF_HOSTED_FN("repeat", "String_repeat", 1, 0),
   3911 #if JS_HAS_INTL_API
   3912    JS_FN("normalize", str_normalize, 0, 0),
   3913 #endif
   3914 
   3915    /* Perl-ish methods (search is actually Python-esque). */
   3916    JS_SELF_HOSTED_FN("match", "String_match", 1, 0),
   3917    JS_SELF_HOSTED_FN("matchAll", "String_matchAll", 1, 0),
   3918    JS_SELF_HOSTED_FN("search", "String_search", 1, 0),
   3919    JS_SELF_HOSTED_FN("replace", "String_replace", 2, 0),
   3920    JS_SELF_HOSTED_FN("replaceAll", "String_replaceAll", 2, 0),
   3921    JS_SELF_HOSTED_FN("split", "String_split", 2, 0),
   3922    JS_SELF_HOSTED_FN("substr", "String_substr", 2, 0),
   3923 
   3924    /* Python-esque sequence methods. */
   3925    JS_SELF_HOSTED_FN("concat", "String_concat", 1, 0),
   3926    JS_SELF_HOSTED_FN("slice", "String_slice", 2, 0),
   3927 
   3928    /* HTML string methods. */
   3929    JS_SELF_HOSTED_FN("bold", "String_bold", 0, 0),
   3930    JS_SELF_HOSTED_FN("italics", "String_italics", 0, 0),
   3931    JS_SELF_HOSTED_FN("fixed", "String_fixed", 0, 0),
   3932    JS_SELF_HOSTED_FN("strike", "String_strike", 0, 0),
   3933    JS_SELF_HOSTED_FN("small", "String_small", 0, 0),
   3934    JS_SELF_HOSTED_FN("big", "String_big", 0, 0),
   3935    JS_SELF_HOSTED_FN("blink", "String_blink", 0, 0),
   3936    JS_SELF_HOSTED_FN("sup", "String_sup", 0, 0),
   3937    JS_SELF_HOSTED_FN("sub", "String_sub", 0, 0),
   3938    JS_SELF_HOSTED_FN("anchor", "String_anchor", 1, 0),
   3939    JS_SELF_HOSTED_FN("link", "String_link", 1, 0),
   3940    JS_SELF_HOSTED_FN("fontcolor", "String_fontcolor", 1, 0),
   3941    JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1, 0),
   3942 
   3943    JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0, 0),
   3944 
   3945    /* well-formed unicode strings */
   3946    JS_FN("isWellFormed", str_isWellFormed, 0, 0),
   3947    JS_FN("toWellFormed", str_toWellFormed, 0, 0),
   3948 
   3949    JS_FS_END,
   3950 };
   3951 
   3952 // ES6 rev 27 (2014 Aug 24) 21.1.1
   3953 bool js::StringConstructor(JSContext* cx, unsigned argc, Value* vp) {
   3954  CallArgs args = CallArgsFromVp(argc, vp);
   3955 
   3956  RootedString str(cx);
   3957  if (args.length() > 0) {
   3958    if (!args.isConstructing() && args[0].isSymbol()) {
   3959      return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval());
   3960    }
   3961 
   3962    str = ToString<CanGC>(cx, args[0]);
   3963    if (!str) {
   3964      return false;
   3965    }
   3966  } else {
   3967    str = cx->runtime()->emptyString;
   3968  }
   3969 
   3970  if (args.isConstructing()) {
   3971    RootedObject proto(cx);
   3972    if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_String, &proto)) {
   3973      return false;
   3974    }
   3975 
   3976    StringObject* strobj = StringObject::create(cx, str, proto);
   3977    if (!strobj) {
   3978      return false;
   3979    }
   3980    args.rval().setObject(*strobj);
   3981    return true;
   3982  }
   3983 
   3984  args.rval().setString(str);
   3985  return true;
   3986 }
   3987 
   3988 static inline JSLinearString* CodeUnitToString(JSContext* cx, char16_t code) {
   3989  if (StaticStrings::hasUnit(code)) {
   3990    return cx->staticStrings().getUnit(code);
   3991  }
   3992  return NewInlineString<CanGC>(cx, {code}, 1);
   3993 }
   3994 
   3995 JSLinearString* js::StringFromCharCode(JSContext* cx, int32_t charCode) {
   3996  return CodeUnitToString(cx, char16_t(charCode));
   3997 }
   3998 
   3999 JSLinearString* js::StringFromCodePoint(JSContext* cx, char32_t codePoint) {
   4000  MOZ_ASSERT(codePoint <= unicode::NonBMPMax);
   4001 
   4002  if (!unicode::IsSupplementary(codePoint)) {
   4003    return CodeUnitToString(cx, char16_t(codePoint));
   4004  }
   4005 
   4006  char16_t chars[] = {unicode::LeadSurrogate(codePoint),
   4007                      unicode::TrailSurrogate(codePoint)};
   4008  return NewInlineString<CanGC>(cx, chars, 2);
   4009 }
   4010 
   4011 // Inspect arguments to guess the output string type.
   4012 static bool GuessFromCharCodeIsLatin1(const CallArgs& args) {
   4013  // Arbitrarily chosen number of samples to inspect.
   4014  constexpr unsigned SampleSize = 8;
   4015 
   4016  for (unsigned i = 0; i < std::min(args.length(), SampleSize); i++) {
   4017    auto v = args[i];
   4018    if (v.isInt32() && uint16_t(v.toInt32()) > JSString::MAX_LATIN1_CHAR) {
   4019      return false;
   4020    }
   4021  }
   4022  return true;
   4023 }
   4024 
   4025 /**
   4026 * 22.1.2.1 String.fromCharCode ( ...codeUnits )
   4027 *
   4028 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35
   4029 */
   4030 bool js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) {
   4031  CallArgs args = CallArgsFromVp(argc, vp);
   4032 
   4033  MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX);
   4034 
   4035  // Optimize the single-char case.
   4036  if (args.length() == 1) {
   4037    uint16_t code;
   4038    if (!ToUint16(cx, args[0], &code)) {
   4039      return false;
   4040    }
   4041 
   4042    JSString* str = CodeUnitToString(cx, char16_t(code));
   4043    if (!str) {
   4044      return false;
   4045    }
   4046 
   4047    args.rval().setString(str);
   4048    return true;
   4049  }
   4050 
   4051  // Optimize the case where the result will be a Latin-1 string.
   4052  StringChars<Latin1Char> latin1Chars(cx);
   4053 
   4054  unsigned i = 0;
   4055  uint16_t firstTwoByteChar = 0;
   4056  if (GuessFromCharCodeIsLatin1(args)) {
   4057    if (!latin1Chars.maybeAlloc(cx, args.length())) {
   4058      return false;
   4059    }
   4060 
   4061    for (; i < args.length(); i++) {
   4062      uint16_t code;
   4063      if (!ToUint16(cx, args[i], &code)) {
   4064        return false;
   4065      }
   4066 
   4067      if (code > JSString::MAX_LATIN1_CHAR) {
   4068        firstTwoByteChar = code;
   4069        break;
   4070      }
   4071 
   4072      AutoCheckCannotGC nogc;
   4073      latin1Chars.data(nogc)[i] = code;
   4074    }
   4075 
   4076    if (i == args.length()) {
   4077      JSString* str = latin1Chars.toStringDontDeflate<CanGC>(cx, args.length());
   4078      if (!str) {
   4079        return false;
   4080      }
   4081 
   4082      args.rval().setString(str);
   4083      return true;
   4084    }
   4085  }
   4086 
   4087  StringChars<char16_t> twoByteChars(cx);
   4088  if (!twoByteChars.maybeAlloc(cx, args.length())) {
   4089    return false;
   4090  }
   4091 
   4092  // Copy the already processed characters.
   4093  if (i > 0) {
   4094    AutoCheckCannotGC nogc;
   4095    std::copy_n(latin1Chars.data(nogc), i, twoByteChars.data(nogc));
   4096  }
   4097 
   4098  // Copy the first two-byte character, if present.
   4099  if (firstTwoByteChar > 0) {
   4100    MOZ_ASSERT(firstTwoByteChar > JSString::MAX_LATIN1_CHAR);
   4101 
   4102    AutoCheckCannotGC nogc;
   4103    twoByteChars.data(nogc)[i++] = char16_t(firstTwoByteChar);
   4104  }
   4105 
   4106  for (; i < args.length(); i++) {
   4107    uint16_t code;
   4108    if (!ToUint16(cx, args[i], &code)) {
   4109      return false;
   4110    }
   4111 
   4112    AutoCheckCannotGC nogc;
   4113    twoByteChars.data(nogc)[i] = code;
   4114  }
   4115 
   4116  JSString* str = twoByteChars.toStringDontDeflate<CanGC>(cx, args.length());
   4117  if (!str) {
   4118    return false;
   4119  }
   4120 
   4121  args.rval().setString(str);
   4122  return true;
   4123 }
   4124 
   4125 /**
   4126 * 22.1.2.2 String.fromCodePoint ( ...codePoints )
   4127 *
   4128 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35
   4129 */
   4130 static MOZ_ALWAYS_INLINE bool ToCodePoint(JSContext* cx, HandleValue code,
   4131                                          char32_t* codePoint) {
   4132  // String.fromCodePoint, Steps 2.a-d.
   4133 
   4134  // Fast path for the common case - the input is already an int32.
   4135  if (code.isInt32()) {
   4136    // Step 2.a.
   4137    int32_t nextCP = code.toInt32();
   4138 
   4139    // Steps 2.b-d.
   4140    if (MOZ_LIKELY(uint32_t(nextCP) <= unicode::NonBMPMax)) {
   4141      *codePoint = char32_t(nextCP);
   4142      return true;
   4143    }
   4144  }
   4145 
   4146  // Step 2.a.
   4147  double nextCP;
   4148  if (!ToNumber(cx, code, &nextCP)) {
   4149    return false;
   4150  }
   4151 
   4152  // Steps 2.b-c.
   4153  if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 ||
   4154      nextCP > unicode::NonBMPMax) {
   4155    ToCStringBuf cbuf;
   4156    const char* numStr = NumberToCString(&cbuf, nextCP);
   4157    MOZ_ASSERT(numStr);
   4158    JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
   4159                              JSMSG_NOT_A_CODEPOINT, numStr);
   4160    return false;
   4161  }
   4162 
   4163  // Steps 2.d.
   4164  *codePoint = char32_t(nextCP);
   4165  return true;
   4166 }
   4167 
   4168 /**
   4169 * 22.1.2.2 String.fromCodePoint ( ...codePoints )
   4170 *
   4171 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35
   4172 */
   4173 static bool str_fromCodePoint_few_args(JSContext* cx, const CallArgs& args) {
   4174  MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2);
   4175 
   4176  // Step 1.
   4177  char16_t elements[JSFatInlineString::MAX_LENGTH_TWO_BYTE];
   4178 
   4179  // Step 2.
   4180  unsigned length = 0;
   4181  for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
   4182    // Steps 2.a-c.
   4183    char32_t codePoint;
   4184    if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
   4185      return false;
   4186    }
   4187 
   4188    // Step 2.d.
   4189    unicode::UTF16Encode(codePoint, elements, &length);
   4190  }
   4191 
   4192  // Steps 3-4.
   4193  JSString* str = NewStringCopyN<CanGC>(cx, elements, length);
   4194  if (!str) {
   4195    return false;
   4196  }
   4197 
   4198  args.rval().setString(str);
   4199  return true;
   4200 }
   4201 
   4202 /**
   4203 * 22.1.2.2 String.fromCodePoint ( ...codePoints )
   4204 *
   4205 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35
   4206 */
   4207 bool js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) {
   4208  CallArgs args = CallArgsFromVp(argc, vp);
   4209 
   4210  // Optimize the single code-point case.
   4211  if (args.length() == 1) {
   4212    // Step 1. (Omitted)
   4213 
   4214    // Step 2.
   4215    char32_t codePoint;
   4216    if (!ToCodePoint(cx, args[0], &codePoint)) {
   4217      return false;
   4218    }
   4219 
   4220    // Steps 3-4.
   4221    JSString* str = StringFromCodePoint(cx, codePoint);
   4222    if (!str) {
   4223      return false;
   4224    }
   4225 
   4226    args.rval().setString(str);
   4227    return true;
   4228  }
   4229 
   4230  // Optimize the case where the result will definitely fit in an inline
   4231  // string (thin or fat) and so we don't need to malloc the chars. (We could
   4232  // cover some cases where |args.length()| goes up to
   4233  // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars
   4234  // are all Latin-1, but it doesn't seem worth the effort.)
   4235  if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2) {
   4236    return str_fromCodePoint_few_args(cx, args);
   4237  }
   4238 
   4239  // Step 1.
   4240  static_assert(
   4241      ARGS_LENGTH_MAX < std::numeric_limits<decltype(args.length())>::max() / 2,
   4242      "|args.length() * 2| does not overflow");
   4243  auto elements = cx->make_pod_arena_array<char16_t>(js::StringBufferArena,
   4244                                                     args.length() * 2);
   4245  if (!elements) {
   4246    return false;
   4247  }
   4248 
   4249  // Steps 2.
   4250  unsigned length = 0;
   4251  for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
   4252    // Steps 2.a-c.
   4253    char32_t codePoint;
   4254    if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
   4255      return false;
   4256    }
   4257 
   4258    // Step 2.d.
   4259    unicode::UTF16Encode(codePoint, elements.get(), &length);
   4260  }
   4261 
   4262  // Steps 3-4.
   4263  JSString* str = NewString<CanGC>(cx, std::move(elements), length);
   4264  if (!str) {
   4265    return false;
   4266  }
   4267 
   4268  args.rval().setString(str);
   4269  return true;
   4270 }
   4271 
   4272 static const JSFunctionSpec string_static_methods[] = {
   4273    JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0,
   4274                    StringFromCharCode),
   4275    JS_INLINABLE_FN("fromCodePoint", js::str_fromCodePoint, 1, 0,
   4276                    StringFromCodePoint),
   4277 
   4278    JS_SELF_HOSTED_FN("raw", "String_static_raw", 1, 0),
   4279    JS_FS_END,
   4280 };
   4281 
   4282 /* static */
   4283 SharedShape* StringObject::assignInitialShape(JSContext* cx,
   4284                                              Handle<StringObject*> obj) {
   4285  MOZ_ASSERT(obj->empty());
   4286 
   4287  if (!NativeObject::addPropertyInReservedSlot(cx, obj, cx->names().length,
   4288                                               LENGTH_SLOT, {})) {
   4289    return nullptr;
   4290  }
   4291 
   4292  return obj->sharedShape();
   4293 }
   4294 
   4295 JSObject* StringObject::createPrototype(JSContext* cx, JSProtoKey key) {
   4296  Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
   4297 
   4298  // Because the `length` property of a StringObject is both non-configurable
   4299  // and non-writable, we need to take the slow path of proxy result
   4300  // validation for them, and so we need to ensure that the initial ObjectFlags
   4301  // reflect that. Normally this would be handled for us, but the special
   4302  // SharedShape::ensureInitialCustomShape path which ultimately takes us
   4303  // through StringObject::assignInitialShape which adds the problematic
   4304  // property sneaks past our flag setting logic and results in a failed
   4305  // lookup of the initial shape in SharedShape::insertInitialShape.
   4306  Rooted<StringObject*> proto(
   4307      cx, GlobalObject::createBlankPrototype<StringObject>(
   4308              cx, cx->global(),
   4309              ObjectFlags({ObjectFlag::NeedsProxyGetSetResultValidation})));
   4310  if (!proto) {
   4311    return nullptr;
   4312  }
   4313  if (!StringObject::init(cx, proto, empty)) {
   4314    return nullptr;
   4315  }
   4316  return proto;
   4317 }
   4318 
   4319 static bool StringClassFinish(JSContext* cx, HandleObject ctor,
   4320                              HandleObject proto) {
   4321  Handle<NativeObject*> nativeProto = proto.as<NativeObject>();
   4322 
   4323  // Create "trimLeft" as an alias for "trimStart".
   4324  RootedValue trimFn(cx);
   4325  RootedId trimId(cx, NameToId(cx->names().trimStart));
   4326  RootedId trimAliasId(cx, NameToId(cx->names().trimLeft));
   4327  if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) ||
   4328      !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) {
   4329    return false;
   4330  }
   4331 
   4332  // Create "trimRight" as an alias for "trimEnd".
   4333  trimId = NameToId(cx->names().trimEnd);
   4334  trimAliasId = NameToId(cx->names().trimRight);
   4335  if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) ||
   4336      !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) {
   4337    return false;
   4338  }
   4339 
   4340  /*
   4341   * Define escape/unescape, the URI encode/decode functions, and maybe
   4342   * uneval on the global object.
   4343   */
   4344  if (!JS_DefineFunctions(cx, cx->global(), string_functions)) {
   4345    return false;
   4346  }
   4347 
   4348  return true;
   4349 }
   4350 
   4351 const ClassSpec StringObject::classSpec_ = {
   4352    GenericCreateConstructor<StringConstructor, 1, gc::AllocKind::FUNCTION,
   4353                             &jit::JitInfo_String>,
   4354    StringObject::createPrototype,
   4355    string_static_methods,
   4356    nullptr,
   4357    string_methods,
   4358    nullptr,
   4359    StringClassFinish,
   4360 };
   4361 
   4362 #define ____ false
   4363 
   4364 /*
   4365 * Uri reserved chars + #:
   4366 * - 35: #
   4367 * - 36: $
   4368 * - 38: &
   4369 * - 43: +
   4370 * - 44: ,
   4371 * - 47: /
   4372 * - 58: :
   4373 * - 59: ;
   4374 * - 61: =
   4375 * - 63: ?
   4376 * - 64: @
   4377 */
   4378 static const bool js_isUriReservedPlusPound[] = {
   4379    // clang-format off
   4380 /*       0     1     2     3     4     5     6     7     8     9  */
   4381 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4382 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4383 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4384 /*  3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
   4385 /*  4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
   4386 /*  5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
   4387 /*  6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
   4388 /*  7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4389 /*  8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4390 /*  9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4391 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4392 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4393 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
   4394    // clang-format on
   4395 };
   4396 
   4397 /*
   4398 * Uri unescaped chars:
   4399 * -      33: !
   4400 * -      39: '
   4401 * -      40: (
   4402 * -      41: )
   4403 * -      42: *
   4404 * -      45: -
   4405 * -      46: .
   4406 * -  48..57: 0-9
   4407 * -  65..90: A-Z
   4408 * -      95: _
   4409 * - 97..122: a-z
   4410 * -     126: ~
   4411 */
   4412 static const bool js_isUriUnescaped[] = {
   4413    // clang-format off
   4414 /*       0     1     2     3     4     5     6     7     8     9  */
   4415 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4416 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4417 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
   4418 /*  3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
   4419 /*  4 */ true, true, true, ____, ____, true, true, ____, true, true,
   4420 /*  5 */ true, true, true, true, true, true, true, true, ____, ____,
   4421 /*  6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
   4422 /*  7 */ true, true, true, true, true, true, true, true, true, true,
   4423 /*  8 */ true, true, true, true, true, true, true, true, true, true,
   4424 /*  9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
   4425 /* 10 */ true, true, true, true, true, true, true, true, true, true,
   4426 /* 11 */ true, true, true, true, true, true, true, true, true, true,
   4427 /* 12 */ true, true, true, ____, ____, ____, true, ____
   4428    // clang-format on
   4429 };
   4430 
   4431 #undef ____
   4432 
   4433 static inline bool TransferBufferToString(JSStringBuilder& sb, JSString* str,
   4434                                          MutableHandleValue rval) {
   4435  if (!sb.empty()) {
   4436    str = sb.finishString();
   4437    if (!str) {
   4438      return false;
   4439    }
   4440  }
   4441  rval.setString(str);
   4442  return true;
   4443 }
   4444 
   4445 /*
   4446 * ECMA 3, 15.1.3 URI Handling Function Properties
   4447 *
   4448 * The following are implementations of the algorithms
   4449 * given in the ECMA specification for the hidden functions
   4450 * 'Encode' and 'Decode'.
   4451 */
   4452 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
   4453 
   4454 // Bug 1403318: GCC sometimes inlines this Encode function rather than the
   4455 // caller Encode function. Annotate both functions with MOZ_NEVER_INLINE resp.
   4456 // MOZ_ALWAYS_INLINE to ensure we get the desired inlining behavior.
   4457 template <typename CharT>
   4458 static MOZ_NEVER_INLINE EncodeResult Encode(StringBuilder& sb,
   4459                                            const CharT* chars, size_t length,
   4460                                            const bool* unescapedSet) {
   4461  Latin1Char hexBuf[3];
   4462  hexBuf[0] = '%';
   4463 
   4464  auto appendEncoded = [&sb, &hexBuf](Latin1Char c) {
   4465    static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
   4466 
   4467    hexBuf[1] = HexDigits[c >> 4];
   4468    hexBuf[2] = HexDigits[c & 0xf];
   4469    return sb.append(hexBuf, 3);
   4470  };
   4471 
   4472  auto appendRange = [&sb, chars, length](size_t start, size_t end) {
   4473    MOZ_ASSERT(start <= end);
   4474 
   4475    if (start < end) {
   4476      if (start == 0) {
   4477        if (!sb.reserve(length)) {
   4478          return false;
   4479        }
   4480      }
   4481      return sb.append(chars + start, chars + end);
   4482    }
   4483    return true;
   4484  };
   4485 
   4486  size_t startAppend = 0;
   4487  for (size_t k = 0; k < length; k++) {
   4488    CharT c = chars[k];
   4489    if (c < 128 &&
   4490        (js_isUriUnescaped[c] || (unescapedSet && unescapedSet[c]))) {
   4491      continue;
   4492    } else {
   4493      if (!appendRange(startAppend, k)) {
   4494        return Encode_Failure;
   4495      }
   4496 
   4497      if constexpr (std::is_same_v<CharT, Latin1Char>) {
   4498        if (c < 0x80) {
   4499          if (!appendEncoded(c)) {
   4500            return Encode_Failure;
   4501          }
   4502        } else {
   4503          if (!appendEncoded(0xC0 | (c >> 6)) ||
   4504              !appendEncoded(0x80 | (c & 0x3F))) {
   4505            return Encode_Failure;
   4506          }
   4507        }
   4508      } else {
   4509        if (unicode::IsTrailSurrogate(c)) {
   4510          return Encode_BadUri;
   4511        }
   4512 
   4513        char32_t v;
   4514        if (!unicode::IsLeadSurrogate(c)) {
   4515          v = c;
   4516        } else {
   4517          k++;
   4518          if (k == length) {
   4519            return Encode_BadUri;
   4520          }
   4521 
   4522          char16_t c2 = chars[k];
   4523          if (!unicode::IsTrailSurrogate(c2)) {
   4524            return Encode_BadUri;
   4525          }
   4526 
   4527          v = unicode::UTF16Decode(c, c2);
   4528        }
   4529 
   4530        uint8_t utf8buf[4];
   4531        size_t L = OneUcs4ToUtf8Char(utf8buf, v);
   4532        for (size_t j = 0; j < L; j++) {
   4533          if (!appendEncoded(utf8buf[j])) {
   4534            return Encode_Failure;
   4535          }
   4536        }
   4537      }
   4538 
   4539      startAppend = k + 1;
   4540    }
   4541  }
   4542 
   4543  if (startAppend > 0) {
   4544    if (!appendRange(startAppend, length)) {
   4545      return Encode_Failure;
   4546    }
   4547  }
   4548 
   4549  return Encode_Success;
   4550 }
   4551 
   4552 static MOZ_ALWAYS_INLINE bool Encode(JSContext* cx, Handle<JSLinearString*> str,
   4553                                     const bool* unescapedSet,
   4554                                     MutableHandleValue rval) {
   4555  size_t length = str->length();
   4556  if (length == 0) {
   4557    rval.setString(cx->runtime()->emptyString);
   4558    return true;
   4559  }
   4560 
   4561  JSStringBuilder sb(cx);
   4562 
   4563  EncodeResult res;
   4564  if (str->hasLatin1Chars()) {
   4565    AutoCheckCannotGC nogc;
   4566    res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet);
   4567  } else {
   4568    AutoCheckCannotGC nogc;
   4569    res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet);
   4570  }
   4571 
   4572  if (res == Encode_Failure) {
   4573    return false;
   4574  }
   4575 
   4576  if (res == Encode_BadUri) {
   4577    JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
   4578    return false;
   4579  }
   4580 
   4581  MOZ_ASSERT(res == Encode_Success);
   4582  return TransferBufferToString(sb, str, rval);
   4583 }
   4584 
   4585 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
   4586 
   4587 template <typename CharT>
   4588 static DecodeResult Decode(StringBuilder& sb, const CharT* chars, size_t length,
   4589                           const bool* reservedSet) {
   4590  auto appendRange = [&sb, chars](size_t start, size_t end) {
   4591    MOZ_ASSERT(start <= end);
   4592 
   4593    if (start < end) {
   4594      return sb.append(chars + start, chars + end);
   4595    }
   4596    return true;
   4597  };
   4598 
   4599  size_t startAppend = 0;
   4600  for (size_t k = 0; k < length; k++) {
   4601    CharT c = chars[k];
   4602    if (c == '%') {
   4603      size_t start = k;
   4604      if ((k + 2) >= length) {
   4605        return Decode_BadUri;
   4606      }
   4607 
   4608      if (!IsAsciiHexDigit(chars[k + 1]) || !IsAsciiHexDigit(chars[k + 2])) {
   4609        return Decode_BadUri;
   4610      }
   4611 
   4612      uint32_t B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 +
   4613                   AsciiAlphanumericToNumber(chars[k + 2]);
   4614      k += 2;
   4615      if (B < 128) {
   4616        Latin1Char ch = Latin1Char(B);
   4617        if (reservedSet && reservedSet[ch]) {
   4618          continue;
   4619        }
   4620 
   4621        if (!appendRange(startAppend, start)) {
   4622          return Decode_Failure;
   4623        }
   4624        if (!sb.append(ch)) {
   4625          return Decode_Failure;
   4626        }
   4627      } else {
   4628        int n = 1;
   4629        while (B & (0x80 >> n)) {
   4630          n++;
   4631        }
   4632 
   4633        if (n == 1 || n > 4) {
   4634          return Decode_BadUri;
   4635        }
   4636 
   4637        uint8_t octets[4];
   4638        octets[0] = (uint8_t)B;
   4639        if (k + 3 * (n - 1) >= length) {
   4640          return Decode_BadUri;
   4641        }
   4642 
   4643        for (int j = 1; j < n; j++) {
   4644          k++;
   4645          if (chars[k] != '%') {
   4646            return Decode_BadUri;
   4647          }
   4648 
   4649          if (!IsAsciiHexDigit(chars[k + 1]) ||
   4650              !IsAsciiHexDigit(chars[k + 2])) {
   4651            return Decode_BadUri;
   4652          }
   4653 
   4654          B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 +
   4655              AsciiAlphanumericToNumber(chars[k + 2]);
   4656          if ((B & 0xC0) != 0x80) {
   4657            return Decode_BadUri;
   4658          }
   4659 
   4660          k += 2;
   4661          octets[j] = char(B);
   4662        }
   4663 
   4664        if (!appendRange(startAppend, start)) {
   4665          return Decode_Failure;
   4666        }
   4667 
   4668        char32_t v = JS::Utf8ToOneUcs4Char(octets, n);
   4669        MOZ_ASSERT(v >= 128);
   4670        if (v >= unicode::NonBMPMin) {
   4671          if (v > unicode::NonBMPMax) {
   4672            return Decode_BadUri;
   4673          }
   4674 
   4675          if (!sb.append(unicode::LeadSurrogate(v))) {
   4676            return Decode_Failure;
   4677          }
   4678          if (!sb.append(unicode::TrailSurrogate(v))) {
   4679            return Decode_Failure;
   4680          }
   4681        } else {
   4682          if (!sb.append(char16_t(v))) {
   4683            return Decode_Failure;
   4684          }
   4685        }
   4686      }
   4687 
   4688      startAppend = k + 1;
   4689    }
   4690  }
   4691 
   4692  if (startAppend > 0) {
   4693    if (!appendRange(startAppend, length)) {
   4694      return Decode_Failure;
   4695    }
   4696  }
   4697 
   4698  return Decode_Success;
   4699 }
   4700 
   4701 static bool Decode(JSContext* cx, Handle<JSLinearString*> str,
   4702                   const bool* reservedSet, MutableHandleValue rval) {
   4703  size_t length = str->length();
   4704  if (length == 0) {
   4705    rval.setString(cx->runtime()->emptyString);
   4706    return true;
   4707  }
   4708 
   4709  JSStringBuilder sb(cx);
   4710 
   4711  DecodeResult res;
   4712  if (str->hasLatin1Chars()) {
   4713    AutoCheckCannotGC nogc;
   4714    res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
   4715  } else {
   4716    AutoCheckCannotGC nogc;
   4717    res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
   4718  }
   4719 
   4720  if (res == Decode_Failure) {
   4721    return false;
   4722  }
   4723 
   4724  if (res == Decode_BadUri) {
   4725    JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
   4726    return false;
   4727  }
   4728 
   4729  MOZ_ASSERT(res == Decode_Success);
   4730  return TransferBufferToString(sb, str, rval);
   4731 }
   4732 
   4733 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp) {
   4734  AutoJSMethodProfilerEntry pseudoFrame(cx, "decodeURI");
   4735  CallArgs args = CallArgsFromVp(argc, vp);
   4736  Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0));
   4737  if (!str) {
   4738    return false;
   4739  }
   4740 
   4741  return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
   4742 }
   4743 
   4744 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
   4745  AutoJSMethodProfilerEntry pseudoFrame(cx, "decodeURIComponent");
   4746  CallArgs args = CallArgsFromVp(argc, vp);
   4747  Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0));
   4748  if (!str) {
   4749    return false;
   4750  }
   4751 
   4752  return Decode(cx, str, nullptr, args.rval());
   4753 }
   4754 
   4755 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp) {
   4756  AutoJSMethodProfilerEntry pseudoFrame(cx, "encodeURI");
   4757  CallArgs args = CallArgsFromVp(argc, vp);
   4758  Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0));
   4759  if (!str) {
   4760    return false;
   4761  }
   4762 
   4763  return Encode(cx, str, js_isUriReservedPlusPound, args.rval());
   4764 }
   4765 
   4766 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
   4767  AutoJSMethodProfilerEntry pseudoFrame(cx, "encodeURIComponent");
   4768  CallArgs args = CallArgsFromVp(argc, vp);
   4769  Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0));
   4770  if (!str) {
   4771    return false;
   4772  }
   4773 
   4774  return Encode(cx, str, nullptr, args.rval());
   4775 }
   4776 
   4777 JSString* js::EncodeURI(JSContext* cx, const char* chars, size_t length) {
   4778  JSStringBuilder sb(cx);
   4779  EncodeResult result = Encode(sb, reinterpret_cast<const Latin1Char*>(chars),
   4780                               length, js_isUriReservedPlusPound);
   4781  if (result == EncodeResult::Encode_Failure) {
   4782    return nullptr;
   4783  }
   4784  if (result == EncodeResult::Encode_BadUri) {
   4785    JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
   4786    return nullptr;
   4787  }
   4788  if (sb.empty()) {
   4789    return NewStringCopyN<CanGC>(cx, chars, length);
   4790  }
   4791  return sb.finishString();
   4792 }
   4793 
   4794 static bool FlatStringMatchHelper(JSContext* cx, JSString* str,
   4795                                  JSString* pattern, bool* isFlat,
   4796                                  int32_t* match) {
   4797  JSLinearString* linearPattern = pattern->ensureLinear(cx);
   4798  if (!linearPattern) {
   4799    return false;
   4800  }
   4801 
   4802  static const size_t MAX_FLAT_PAT_LEN = 256;
   4803  if (linearPattern->length() > MAX_FLAT_PAT_LEN ||
   4804      StringHasRegExpMetaChars(linearPattern)) {
   4805    *isFlat = false;
   4806    return true;
   4807  }
   4808 
   4809  *isFlat = true;
   4810  if (str->isRope()) {
   4811    if (!RopeMatch(cx, &str->asRope(), linearPattern, match)) {
   4812      return false;
   4813    }
   4814  } else {
   4815    *match = StringMatch(&str->asLinear(), linearPattern);
   4816  }
   4817 
   4818  return true;
   4819 }
   4820 
   4821 static bool BuildFlatMatchArray(JSContext* cx, HandleString str,
   4822                                HandleString pattern, int32_t match,
   4823                                MutableHandleValue rval) {
   4824  if (match < 0) {
   4825    rval.setNull();
   4826    return true;
   4827  }
   4828 
   4829  // Get the shape for the match result object.
   4830  Rooted<SharedShape*> shape(
   4831      cx, cx->global()->regExpRealm().getOrCreateMatchResultShape(cx));
   4832  if (!shape) {
   4833    return false;
   4834  }
   4835 
   4836  Rooted<ArrayObject*> arr(cx,
   4837                           NewDenseFullyAllocatedArrayWithShape(cx, 1, shape));
   4838  if (!arr) {
   4839    return false;
   4840  }
   4841 
   4842  // Store a Value for each pair.
   4843  arr->setDenseInitializedLength(1);
   4844  arr->initDenseElement(0, StringValue(pattern));
   4845 
   4846  // Set the |index| property.
   4847  arr->initSlot(RegExpRealm::MatchResultObjectIndexSlot, Int32Value(match));
   4848 
   4849  // Set the |input| property.
   4850  arr->initSlot(RegExpRealm::MatchResultObjectInputSlot, StringValue(str));
   4851 
   4852 #ifdef DEBUG
   4853  RootedValue test(cx);
   4854  RootedId id(cx, NameToId(cx->names().index));
   4855  if (!NativeGetProperty(cx, arr, id, &test)) {
   4856    return false;
   4857  }
   4858  MOZ_ASSERT(test == arr->getSlot(0));
   4859  id = NameToId(cx->names().input);
   4860  if (!NativeGetProperty(cx, arr, id, &test)) {
   4861    return false;
   4862  }
   4863  MOZ_ASSERT(test == arr->getSlot(1));
   4864 #endif
   4865 
   4866  rval.setObject(*arr);
   4867  return true;
   4868 }
   4869 
   4870 bool js::FlatStringMatch(JSContext* cx, unsigned argc, Value* vp) {
   4871  CallArgs args = CallArgsFromVp(argc, vp);
   4872  MOZ_ASSERT(args.length() == 2);
   4873  MOZ_ASSERT(args[0].isString());
   4874  MOZ_ASSERT(args[1].isString());
   4875  MOZ_ASSERT(cx->realm()->realmFuses.optimizeRegExpPrototypeFuse.intact());
   4876 
   4877  RootedString str(cx, args[0].toString());
   4878  RootedString pattern(cx, args[1].toString());
   4879 
   4880  bool isFlat = false;
   4881  int32_t match = 0;
   4882  if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) {
   4883    return false;
   4884  }
   4885 
   4886  if (!isFlat) {
   4887    args.rval().setUndefined();
   4888    return true;
   4889  }
   4890 
   4891  return BuildFlatMatchArray(cx, str, pattern, match, args.rval());
   4892 }
   4893 
   4894 bool js::FlatStringSearch(JSContext* cx, unsigned argc, Value* vp) {
   4895  CallArgs args = CallArgsFromVp(argc, vp);
   4896  MOZ_ASSERT(args.length() == 2);
   4897  MOZ_ASSERT(args[0].isString());
   4898  MOZ_ASSERT(args[1].isString());
   4899  MOZ_ASSERT(cx->realm()->realmFuses.optimizeRegExpPrototypeFuse.intact());
   4900 
   4901  JSString* str = args[0].toString();
   4902  JSString* pattern = args[1].toString();
   4903 
   4904  bool isFlat = false;
   4905  int32_t match = 0;
   4906  if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) {
   4907    return false;
   4908  }
   4909 
   4910  if (!isFlat) {
   4911    args.rval().setInt32(-2);
   4912    return true;
   4913  }
   4914 
   4915  args.rval().setInt32(match);
   4916  return true;
   4917 }
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE