String.cpp (143872B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "builtin/String.h" 8 9 #include "mozilla/Attributes.h" 10 #include "mozilla/CheckedInt.h" 11 #include "mozilla/Compiler.h" 12 #if JS_HAS_INTL_API 13 # include "mozilla/intl/Locale.h" 14 # include "mozilla/intl/String.h" 15 #endif 16 #include "mozilla/Likely.h" 17 #include "mozilla/Maybe.h" 18 #include "mozilla/PodOperations.h" 19 #include "mozilla/Range.h" 20 #include "mozilla/SIMD.h" 21 #include "mozilla/TextUtils.h" 22 23 #include <algorithm> 24 #include <limits> 25 #include <string.h> 26 #include <type_traits> 27 28 #include "jsnum.h" 29 #include "jstypes.h" 30 31 #include "builtin/Array.h" 32 #if JS_HAS_INTL_API 33 # include "builtin/intl/Collator.h" 34 # include "builtin/intl/CommonFunctions.h" 35 # include "builtin/intl/FormatBuffer.h" 36 # include "builtin/intl/GlobalIntlData.h" 37 # include "builtin/intl/LocaleNegotiation.h" 38 #endif 39 #include "builtin/RegExp.h" 40 #include "gc/GC.h" 41 #include "jit/InlinableNatives.h" 42 #include "js/Conversions.h" 43 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* 44 #if !JS_HAS_INTL_API 45 # include "js/LocaleSensitive.h" 46 #endif 47 #include "js/Prefs.h" 48 #include "js/Printer.h" 49 #include "js/PropertyAndElement.h" // JS_DefineFunctions 50 #include "js/PropertySpec.h" 51 #include "js/StableStringChars.h" 52 #include "js/UniquePtr.h" 53 #include "util/StringBuilder.h" 54 #include "util/Unicode.h" 55 #include "vm/GlobalObject.h" 56 #include "vm/JSContext.h" 57 #include "vm/JSObject.h" 58 #include "vm/RegExpObject.h" 59 #include "vm/SelfHosting.h" 60 #include "vm/StaticStrings.h" 61 #include "vm/ToSource.h" // js::ValueToSource 62 63 #include "vm/GeckoProfiler-inl.h" 64 #include "vm/NativeObject-inl.h" 65 #include "vm/StringObject-inl.h" 66 #include "vm/StringType-inl.h" 67 68 using namespace js; 69 70 using mozilla::AsciiAlphanumericToNumber; 71 using mozilla::CheckedInt; 72 using mozilla::EnsureUtf16ValiditySpan; 73 using mozilla::IsAsciiHexDigit; 74 using mozilla::PodCopy; 75 using mozilla::RangedPtr; 76 using mozilla::SIMD; 77 using mozilla::Span; 78 using mozilla::Utf16ValidUpTo; 79 80 using JS::AutoCheckCannotGC; 81 using JS::AutoStableStringChars; 82 83 static JSLinearString* ArgToLinearString(JSContext* cx, const CallArgs& args, 84 unsigned argno) { 85 if (argno >= args.length()) { 86 return cx->names().undefined; 87 } 88 89 JSString* str = ToString<CanGC>(cx, args[argno]); 90 if (!str) { 91 return nullptr; 92 } 93 94 return str->ensureLinear(cx); 95 } 96 97 /* 98 * Forward declarations for URI encode/decode and helper routines 99 */ 100 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp); 101 102 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp); 103 104 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp); 105 106 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp); 107 108 /* 109 * Global string methods 110 */ 111 112 /* ES5 B.2.1 */ 113 template <typename CharT> 114 static bool Escape(JSContext* cx, const CharT* chars, uint32_t length, 115 StringChars<Latin1Char>& newChars, uint32_t* newLengthOut) { 116 // clang-format off 117 static const uint8_t shouldPassThrough[128] = { 118 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 119 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 120 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */ 121 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */ 122 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */ 123 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */ 124 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */ 125 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */ 126 }; 127 // clang-format on 128 129 /* Take a first pass and see how big the result string will need to be. */ 130 uint32_t newLength = length; 131 for (size_t i = 0; i < length; i++) { 132 char16_t ch = chars[i]; 133 if (ch < 128 && shouldPassThrough[ch]) { 134 continue; 135 } 136 137 /* 138 * newlength is incremented below by at most 5 and at this point it must 139 * be a valid string length, so this should never overflow uint32_t. 140 */ 141 static_assert(JSString::MAX_LENGTH < UINT32_MAX - 5, 142 "Adding 5 to valid string length should not overflow"); 143 144 MOZ_ASSERT(newLength <= JSString::MAX_LENGTH); 145 146 /* The character will be encoded as %XX or %uXXXX. */ 147 newLength += (ch < 256) ? 2 : 5; 148 149 if (MOZ_UNLIKELY(newLength > JSString::MAX_LENGTH)) { 150 ReportAllocationOverflow(cx); 151 return false; 152 } 153 } 154 155 if (newLength == length) { 156 *newLengthOut = newLength; 157 return true; 158 } 159 160 if (!newChars.maybeAlloc(cx, newLength)) { 161 return false; 162 } 163 164 static const char digits[] = "0123456789ABCDEF"; 165 166 JS::AutoCheckCannotGC nogc; 167 Latin1Char* rawNewChars = newChars.data(nogc); 168 size_t i, ni; 169 for (i = 0, ni = 0; i < length; i++) { 170 char16_t ch = chars[i]; 171 if (ch < 128 && shouldPassThrough[ch]) { 172 rawNewChars[ni++] = ch; 173 } else if (ch < 256) { 174 rawNewChars[ni++] = '%'; 175 rawNewChars[ni++] = digits[ch >> 4]; 176 rawNewChars[ni++] = digits[ch & 0xF]; 177 } else { 178 rawNewChars[ni++] = '%'; 179 rawNewChars[ni++] = 'u'; 180 rawNewChars[ni++] = digits[ch >> 12]; 181 rawNewChars[ni++] = digits[(ch & 0xF00) >> 8]; 182 rawNewChars[ni++] = digits[(ch & 0xF0) >> 4]; 183 rawNewChars[ni++] = digits[ch & 0xF]; 184 } 185 } 186 MOZ_ASSERT(ni == newLength); 187 188 *newLengthOut = newLength; 189 return true; 190 } 191 192 static bool str_escape(JSContext* cx, unsigned argc, Value* vp) { 193 AutoJSMethodProfilerEntry pseudoFrame(cx, "escape"); 194 CallArgs args = CallArgsFromVp(argc, vp); 195 196 Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0)); 197 if (!str) { 198 return false; 199 } 200 201 StringChars<Latin1Char> newChars(cx); 202 uint32_t newLength = 0; // initialize to silence GCC warning 203 if (str->hasLatin1Chars()) { 204 AutoCheckCannotGC nogc; 205 if (!Escape(cx, str->latin1Chars(nogc), str->length(), newChars, 206 &newLength)) { 207 return false; 208 } 209 } else { 210 AutoCheckCannotGC nogc; 211 if (!Escape(cx, str->twoByteChars(nogc), str->length(), newChars, 212 &newLength)) { 213 return false; 214 } 215 } 216 217 // Return input if no characters need to be escaped. 218 if (newLength == str->length()) { 219 args.rval().setString(str); 220 return true; 221 } 222 223 JSString* res = newChars.toStringDontDeflateNonStatic<CanGC>(cx, newLength); 224 if (!res) { 225 return false; 226 } 227 228 args.rval().setString(res); 229 return true; 230 } 231 232 template <typename CharT> 233 static inline bool Unhex4(const RangedPtr<const CharT> chars, 234 char16_t* result) { 235 CharT a = chars[0], b = chars[1], c = chars[2], d = chars[3]; 236 237 if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b) && IsAsciiHexDigit(c) && 238 IsAsciiHexDigit(d))) { 239 return false; 240 } 241 242 char16_t unhex = AsciiAlphanumericToNumber(a); 243 unhex = (unhex << 4) + AsciiAlphanumericToNumber(b); 244 unhex = (unhex << 4) + AsciiAlphanumericToNumber(c); 245 unhex = (unhex << 4) + AsciiAlphanumericToNumber(d); 246 *result = unhex; 247 return true; 248 } 249 250 template <typename CharT> 251 static inline bool Unhex2(const RangedPtr<const CharT> chars, 252 char16_t* result) { 253 CharT a = chars[0], b = chars[1]; 254 255 if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b))) { 256 return false; 257 } 258 259 *result = (AsciiAlphanumericToNumber(a) << 4) + AsciiAlphanumericToNumber(b); 260 return true; 261 } 262 263 template <typename CharT> 264 static bool Unescape(StringBuilder& sb, 265 const mozilla::Range<const CharT> chars) { 266 // Step 2. 267 uint32_t length = chars.length(); 268 269 /* 270 * Note that the spec algorithm has been optimized to avoid building 271 * a string in the case where no escapes are present. 272 */ 273 bool building = false; 274 275 #define ENSURE_BUILDING \ 276 do { \ 277 if (!building) { \ 278 building = true; \ 279 if (!sb.reserve(length)) return false; \ 280 sb.infallibleAppend(chars.begin().get(), k); \ 281 } \ 282 } while (false); 283 284 // Step 4. 285 uint32_t k = 0; 286 287 // Step 5. 288 while (k < length) { 289 // Step 5.a. 290 char16_t c = chars[k]; 291 292 // Step 5.b. 293 if (c == '%') { 294 static_assert(JSString::MAX_LENGTH < UINT32_MAX - 6, 295 "String length is not near UINT32_MAX"); 296 297 // Steps 5.b.i-ii. 298 if (k + 6 <= length && chars[k + 1] == 'u') { 299 if (Unhex4(chars.begin() + k + 2, &c)) { 300 ENSURE_BUILDING 301 k += 5; 302 } 303 } else if (k + 3 <= length) { 304 if (Unhex2(chars.begin() + k + 1, &c)) { 305 ENSURE_BUILDING 306 k += 2; 307 } 308 } 309 } 310 311 // Step 5.c. 312 if (building && !sb.append(c)) { 313 return false; 314 } 315 316 // Step 5.d. 317 k += 1; 318 } 319 320 return true; 321 #undef ENSURE_BUILDING 322 } 323 324 // ES2018 draft rev f83aa38282c2a60c6916ebc410bfdf105a0f6a54 325 // B.2.1.2 unescape ( string ) 326 static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) { 327 AutoJSMethodProfilerEntry pseudoFrame(cx, "unescape"); 328 CallArgs args = CallArgsFromVp(argc, vp); 329 330 // Step 1. 331 Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0)); 332 if (!str) { 333 return false; 334 } 335 336 // Step 3. 337 JSStringBuilder sb(cx); 338 if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) { 339 return false; 340 } 341 342 // Steps 2, 4-5. 343 bool unescapeFailed = false; 344 if (str->hasLatin1Chars()) { 345 AutoCheckCannotGC nogc; 346 unescapeFailed = !Unescape(sb, str->latin1Range(nogc)); 347 } else { 348 AutoCheckCannotGC nogc; 349 unescapeFailed = !Unescape(sb, str->twoByteRange(nogc)); 350 } 351 if (unescapeFailed) { 352 return false; 353 } 354 355 // Step 6. 356 JSLinearString* result; 357 if (!sb.empty()) { 358 result = sb.finishString(); 359 if (!result) { 360 return false; 361 } 362 } else { 363 result = str; 364 } 365 366 args.rval().setString(result); 367 return true; 368 } 369 370 static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) { 371 CallArgs args = CallArgsFromVp(argc, vp); 372 JSString* str = ValueToSource(cx, args.get(0)); 373 if (!str) { 374 return false; 375 } 376 377 args.rval().setString(str); 378 return true; 379 } 380 381 static const JSFunctionSpec string_functions[] = { 382 JS_FN("escape", str_escape, 1, JSPROP_RESOLVING), 383 JS_FN("unescape", str_unescape, 1, JSPROP_RESOLVING), 384 JS_FN("uneval", str_uneval, 1, JSPROP_RESOLVING), 385 JS_FN("decodeURI", str_decodeURI, 1, JSPROP_RESOLVING), 386 JS_FN("encodeURI", str_encodeURI, 1, JSPROP_RESOLVING), 387 JS_FN("decodeURIComponent", str_decodeURI_Component, 1, JSPROP_RESOLVING), 388 JS_FN("encodeURIComponent", str_encodeURI_Component, 1, JSPROP_RESOLVING), 389 JS_FS_END, 390 }; 391 392 static const unsigned STRING_ELEMENT_ATTRS = 393 JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT; 394 395 static bool str_enumerate(JSContext* cx, HandleObject obj) { 396 RootedString str(cx, obj->as<StringObject>().unbox()); 397 js::StaticStrings& staticStrings = cx->staticStrings(); 398 399 RootedValue value(cx); 400 for (size_t i = 0, length = str->length(); i < length; i++) { 401 JSString* str1 = staticStrings.getUnitStringForElement(cx, str, i); 402 if (!str1) { 403 return false; 404 } 405 value.setString(str1); 406 if (!DefineDataElement(cx, obj, i, value, 407 STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) { 408 return false; 409 } 410 } 411 412 return true; 413 } 414 415 static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) { 416 // str_resolve ignores non-integer ids. 417 return id.isInt(); 418 } 419 420 static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id, 421 bool* resolvedp) { 422 if (!id.isInt()) { 423 return true; 424 } 425 426 RootedString str(cx, obj->as<StringObject>().unbox()); 427 428 int32_t slot = id.toInt(); 429 if ((size_t)slot < str->length()) { 430 JSString* str1 = 431 cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot)); 432 if (!str1) { 433 return false; 434 } 435 RootedValue value(cx, StringValue(str1)); 436 if (!DefineDataElement(cx, obj, uint32_t(slot), value, 437 STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) { 438 return false; 439 } 440 *resolvedp = true; 441 } 442 return true; 443 } 444 445 static const JSClassOps StringObjectClassOps = { 446 nullptr, // addProperty 447 nullptr, // delProperty 448 str_enumerate, // enumerate 449 nullptr, // newEnumerate 450 str_resolve, // resolve 451 str_mayResolve, // mayResolve 452 nullptr, // finalize 453 nullptr, // call 454 nullptr, // construct 455 nullptr, // trace 456 }; 457 458 const JSClass StringObject::class_ = { 459 "String", 460 JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) | 461 JSCLASS_HAS_CACHED_PROTO(JSProto_String), 462 &StringObjectClassOps, 463 &StringObject::classSpec_, 464 }; 465 466 /* 467 * Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)| 468 * from nearly all String.prototype.* functions. 469 */ 470 static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction( 471 JSContext* cx, const char* funName, HandleValue thisv) { 472 if (thisv.isString()) { 473 return thisv.toString(); 474 } 475 476 if (thisv.isObject()) { 477 if (thisv.toObject().is<StringObject>()) { 478 StringObject* nobj = &thisv.toObject().as<StringObject>(); 479 // We have to make sure that the ToPrimitive call from ToString 480 // would be unobservable. 481 if (HasNoToPrimitiveMethodPure(nobj, cx) && 482 HasNativeMethodPure(nobj, cx->names().toString, str_toString, cx)) { 483 return nobj->unbox(); 484 } 485 } 486 } else if (thisv.isNullOrUndefined()) { 487 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 488 JSMSG_INCOMPATIBLE_PROTO, "String", funName, 489 thisv.isNull() ? "null" : "undefined"); 490 return nullptr; 491 } 492 493 return ToStringSlow<CanGC>(cx, thisv); 494 } 495 496 MOZ_ALWAYS_INLINE bool IsString(HandleValue v) { 497 return v.isString() || (v.isObject() && v.toObject().is<StringObject>()); 498 } 499 500 MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) { 501 MOZ_ASSERT(IsString(args.thisv())); 502 503 JSString* str = ToString<CanGC>(cx, args.thisv()); 504 if (!str) { 505 return false; 506 } 507 508 UniqueChars quoted = QuoteString(cx, str, '"'); 509 if (!quoted) { 510 return false; 511 } 512 513 JSStringBuilder sb(cx); 514 if (!sb.append("(new String(") || 515 !sb.append(quoted.get(), strlen(quoted.get())) || !sb.append("))")) { 516 return false; 517 } 518 519 JSString* result = sb.finishString(); 520 if (!result) { 521 return false; 522 } 523 args.rval().setString(result); 524 return true; 525 } 526 527 static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) { 528 CallArgs args = CallArgsFromVp(argc, vp); 529 return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args); 530 } 531 532 MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) { 533 MOZ_ASSERT(IsString(args.thisv())); 534 535 args.rval().setString( 536 args.thisv().isString() 537 ? args.thisv().toString() 538 : args.thisv().toObject().as<StringObject>().unbox()); 539 return true; 540 } 541 542 bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) { 543 CallArgs args = CallArgsFromVp(argc, vp); 544 return CallNonGenericMethod<IsString, str_toString_impl>(cx, args); 545 } 546 547 template <typename DestChar, typename SrcChar> 548 static inline void CopyChars(DestChar* destChars, const SrcChar* srcChars, 549 size_t length) { 550 if constexpr (std::is_same_v<DestChar, SrcChar>) { 551 #if MOZ_IS_GCC 552 // Directly call memcpy to work around bug 1863131. 553 memcpy(destChars, srcChars, length * sizeof(DestChar)); 554 #else 555 PodCopy(destChars, srcChars, length); 556 #endif 557 } else { 558 for (size_t i = 0; i < length; i++) { 559 destChars[i] = srcChars[i]; 560 } 561 } 562 } 563 564 template <typename CharT> 565 static inline void CopyChars(CharT* to, const JSLinearString* from, 566 size_t begin, size_t length) { 567 MOZ_ASSERT(begin + length <= from->length()); 568 569 JS::AutoCheckCannotGC nogc; 570 if (from->hasLatin1Chars()) { 571 CopyChars(to, from->latin1Chars(nogc) + begin, length); 572 } else { 573 CopyChars(to, from->twoByteChars(nogc) + begin, length); 574 } 575 } 576 577 template <typename CharT> 578 static JSLinearString* SubstringInlineString(JSContext* cx, 579 Handle<JSLinearString*> left, 580 Handle<JSLinearString*> right, 581 size_t begin, size_t lhsLength, 582 size_t rhsLength) { 583 constexpr size_t MaxLength = std::is_same_v<CharT, Latin1Char> 584 ? JSFatInlineString::MAX_LENGTH_LATIN1 585 : JSFatInlineString::MAX_LENGTH_TWO_BYTE; 586 587 size_t length = lhsLength + rhsLength; 588 MOZ_ASSERT(length <= MaxLength, "total length fits in stack chars"); 589 MOZ_ASSERT(JSInlineString::lengthFits<CharT>(length)); 590 591 CharT chars[MaxLength] = {}; 592 593 CopyChars(chars, left, begin, lhsLength); 594 CopyChars(chars + lhsLength, right, 0, rhsLength); 595 596 if (auto* str = cx->staticStrings().lookup(chars, length)) { 597 return str; 598 } 599 return NewInlineString<CanGC>(cx, chars, length); 600 } 601 602 JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, 603 int32_t lengthInt) { 604 MOZ_ASSERT(0 <= beginInt); 605 MOZ_ASSERT(0 <= lengthInt); 606 MOZ_ASSERT(uint32_t(beginInt) <= str->length()); 607 MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt); 608 609 uint32_t begin = beginInt; 610 uint32_t len = lengthInt; 611 612 /* 613 * Optimization for one level deep ropes. 614 * This is common for the following pattern: 615 * 616 * while() { 617 * text = text.substr(0, x) + "bla" + text.substr(x) 618 * text.charCodeAt(x + 1) 619 * } 620 */ 621 if (str->isRope()) { 622 JSRope* rope = &str->asRope(); 623 624 if (rope->length() == len) { 625 // Substring is the full rope. 626 MOZ_ASSERT(begin == 0); 627 return rope; 628 } 629 630 if (begin + len <= rope->leftChild()->length()) { 631 // Substring is fully contained within the rope's left child. 632 return NewDependentString(cx, rope->leftChild(), begin, len); 633 } 634 635 if (begin >= rope->leftChild()->length()) { 636 // Substring is fully contained within the rope's right child. 637 begin -= rope->leftChild()->length(); 638 return NewDependentString(cx, rope->rightChild(), begin, len); 639 } 640 641 // The substring spans both children. Avoid flattening the rope if the 642 // children are both linear and the substring fits in an inline string. 643 // 644 // Note: we could handle longer substrings by allocating a new rope here, 645 // but this can result in a lot more rope flattening later on. It's safer to 646 // flatten the rope in this case. See bug 1922926. 647 648 MOZ_ASSERT(begin < rope->leftChild()->length() && 649 begin + len > rope->leftChild()->length()); 650 651 bool fitsInline = rope->hasLatin1Chars() 652 ? JSInlineString::lengthFits<Latin1Char>(len) 653 : JSInlineString::lengthFits<char16_t>(len); 654 if (fitsInline && rope->leftChild()->isLinear() && 655 rope->rightChild()->isLinear()) { 656 Rooted<JSLinearString*> left(cx, &rope->leftChild()->asLinear()); 657 Rooted<JSLinearString*> right(cx, &rope->rightChild()->asLinear()); 658 659 size_t lhsLength = left->length() - begin; 660 size_t rhsLength = len - lhsLength; 661 662 if (rope->hasLatin1Chars()) { 663 return SubstringInlineString<Latin1Char>(cx, left, right, begin, 664 lhsLength, rhsLength); 665 } 666 return SubstringInlineString<char16_t>(cx, left, right, begin, lhsLength, 667 rhsLength); 668 } 669 } 670 671 return NewDependentString(cx, str, begin, len); 672 } 673 674 /** 675 * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings 676 * depending on its context: 677 * When it's preceded by a cased character and not followed by another cased 678 * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA. 679 * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA. 680 * 681 * Unicode 9.0, §3.13 Default Case Algorithms 682 */ 683 static char16_t Final_Sigma(const char16_t* chars, size_t length, 684 size_t index) { 685 MOZ_ASSERT(index < length); 686 MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA); 687 MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) == 688 unicode::GREEK_SMALL_LETTER_SIGMA); 689 690 #if JS_HAS_INTL_API 691 // Tell the analysis the BinaryProperty.contains function pointer called by 692 // mozilla::intl::String::Is{CaseIgnorable, Cased} cannot GC. 693 JS::AutoSuppressGCAnalysis nogc; 694 695 bool precededByCased = false; 696 for (size_t i = index; i > 0;) { 697 char16_t c = chars[--i]; 698 char32_t codePoint = c; 699 if (unicode::IsTrailSurrogate(c) && i > 0) { 700 char16_t lead = chars[i - 1]; 701 if (unicode::IsLeadSurrogate(lead)) { 702 codePoint = unicode::UTF16Decode(lead, c); 703 i--; 704 } 705 } 706 707 // Ignore any characters with the property Case_Ignorable. 708 // NB: We need to skip over all Case_Ignorable characters, even when 709 // they also have the Cased binary property. 710 if (mozilla::intl::String::IsCaseIgnorable(codePoint)) { 711 continue; 712 } 713 714 precededByCased = mozilla::intl::String::IsCased(codePoint); 715 break; 716 } 717 if (!precededByCased) { 718 return unicode::GREEK_SMALL_LETTER_SIGMA; 719 } 720 721 bool followedByCased = false; 722 for (size_t i = index + 1; i < length;) { 723 char16_t c = chars[i++]; 724 char32_t codePoint = c; 725 if (unicode::IsLeadSurrogate(c) && i < length) { 726 char16_t trail = chars[i]; 727 if (unicode::IsTrailSurrogate(trail)) { 728 codePoint = unicode::UTF16Decode(c, trail); 729 i++; 730 } 731 } 732 733 // Ignore any characters with the property Case_Ignorable. 734 // NB: We need to skip over all Case_Ignorable characters, even when 735 // they also have the Cased binary property. 736 if (mozilla::intl::String::IsCaseIgnorable(codePoint)) { 737 continue; 738 } 739 740 followedByCased = mozilla::intl::String::IsCased(codePoint); 741 break; 742 } 743 if (!followedByCased) { 744 return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA; 745 } 746 #endif 747 748 return unicode::GREEK_SMALL_LETTER_SIGMA; 749 } 750 751 // If |srcLength == destLength| is true, the destination buffer was allocated 752 // with the same size as the source buffer. When we append characters which 753 // have special casing mappings, we test |srcLength == destLength| to decide 754 // if we need to back out and reallocate a sufficiently large destination 755 // buffer. Otherwise the destination buffer was allocated with the correct 756 // size to hold all lower case mapped characters, i.e. 757 // |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true. 758 template <typename CharT> 759 static size_t ToLowerCaseImpl(CharT* destChars, const CharT* srcChars, 760 size_t startIndex, size_t srcLength, 761 size_t destLength) { 762 MOZ_ASSERT(startIndex < srcLength); 763 MOZ_ASSERT(srcLength <= destLength); 764 if constexpr (std::is_same_v<CharT, Latin1Char>) { 765 MOZ_ASSERT(srcLength == destLength); 766 } 767 768 size_t j = startIndex; 769 for (size_t i = startIndex; i < srcLength; i++) { 770 CharT c = srcChars[i]; 771 if constexpr (!std::is_same_v<CharT, Latin1Char>) { 772 if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) { 773 char16_t trail = srcChars[i + 1]; 774 if (unicode::IsTrailSurrogate(trail)) { 775 trail = unicode::ToLowerCaseNonBMPTrail(c, trail); 776 destChars[j++] = c; 777 destChars[j++] = trail; 778 i++; 779 continue; 780 } 781 } 782 783 // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE 784 // lowercases to <U+0069 U+0307>. 785 if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { 786 // Return if the output buffer is too small. 787 if (srcLength == destLength) { 788 return i; 789 } 790 791 destChars[j++] = CharT('i'); 792 destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE); 793 continue; 794 } 795 796 // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to 797 // one of two codepoints depending on context. 798 if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) { 799 destChars[j++] = Final_Sigma(srcChars, srcLength, i); 800 continue; 801 } 802 } 803 804 c = unicode::ToLowerCase(c); 805 destChars[j++] = c; 806 } 807 808 MOZ_ASSERT(j == destLength); 809 return srcLength; 810 } 811 812 static size_t ToLowerCaseLength(const char16_t* chars, size_t startIndex, 813 size_t length) { 814 size_t lowerLength = length; 815 for (size_t i = startIndex; i < length; i++) { 816 char16_t c = chars[i]; 817 818 // U+0130 is lowercased to the two-element sequence <U+0069 U+0307>. 819 if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { 820 lowerLength += 1; 821 } 822 } 823 return lowerLength; 824 } 825 826 template <typename CharT> 827 static JSLinearString* ToLowerCase(JSContext* cx, JSLinearString* str) { 828 // Unlike toUpperCase, toLowerCase has the nice invariant that if the 829 // input is a Latin-1 string, the output is also a Latin-1 string. 830 831 StringChars<CharT> newChars(cx); 832 833 const size_t length = str->length(); 834 size_t resultLength; 835 { 836 AutoCheckCannotGC nogc; 837 const CharT* chars = str->chars<CharT>(nogc); 838 839 // We don't need extra special casing checks in the loop below, 840 // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3 841 // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings. 842 MOZ_ASSERT(unicode::ChangesWhenLowerCased( 843 unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE), 844 "U+0130 has a simple lower case mapping"); 845 MOZ_ASSERT( 846 unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA), 847 "U+03A3 has a simple lower case mapping"); 848 849 // One element Latin-1 strings can be directly retrieved from the 850 // static strings cache. 851 if constexpr (std::is_same_v<CharT, Latin1Char>) { 852 if (length == 1) { 853 CharT lower = unicode::ToLowerCase(chars[0]); 854 MOZ_ASSERT(StaticStrings::hasUnit(lower)); 855 856 return cx->staticStrings().getUnit(lower); 857 } 858 } 859 860 // Look for the first character that changes when lowercased. 861 size_t i = 0; 862 for (; i < length; i++) { 863 CharT c = chars[i]; 864 if constexpr (!std::is_same_v<CharT, Latin1Char>) { 865 if (unicode::IsLeadSurrogate(c) && i + 1 < length) { 866 CharT trail = chars[i + 1]; 867 if (unicode::IsTrailSurrogate(trail)) { 868 if (unicode::ChangesWhenLowerCasedNonBMP(c, trail)) { 869 break; 870 } 871 872 i++; 873 continue; 874 } 875 } 876 } 877 if (unicode::ChangesWhenLowerCased(c)) { 878 break; 879 } 880 } 881 882 // If no character needs to change, return the input string. 883 if (i == length) { 884 return str; 885 } 886 887 resultLength = length; 888 if (!newChars.maybeAlloc(cx, resultLength)) { 889 return nullptr; 890 } 891 892 PodCopy(newChars.data(nogc), chars, i); 893 894 size_t readChars = 895 ToLowerCaseImpl(newChars.data(nogc), chars, i, length, resultLength); 896 if constexpr (!std::is_same_v<CharT, Latin1Char>) { 897 if (readChars < length) { 898 resultLength = ToLowerCaseLength(chars, readChars, length); 899 900 if (!newChars.maybeRealloc(cx, length, resultLength)) { 901 return nullptr; 902 } 903 904 MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.data(nogc), chars, 905 readChars, length, 906 resultLength)); 907 } 908 } else { 909 MOZ_ASSERT(readChars == length, 910 "Latin-1 strings don't have special lower case mappings"); 911 } 912 } 913 914 return newChars.template toStringDontDeflate<CanGC>(cx, resultLength); 915 } 916 917 JSLinearString* js::StringToLowerCase(JSContext* cx, JSString* string) { 918 JSLinearString* linear = string->ensureLinear(cx); 919 if (!linear) { 920 return nullptr; 921 } 922 923 if (linear->hasLatin1Chars()) { 924 return ToLowerCase<Latin1Char>(cx, linear); 925 } 926 return ToLowerCase<char16_t>(cx, linear); 927 } 928 929 static bool str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) { 930 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "toLowerCase"); 931 CallArgs args = CallArgsFromVp(argc, vp); 932 933 JSString* str = ToStringForStringFunction(cx, "toLowerCase", args.thisv()); 934 if (!str) { 935 return false; 936 } 937 938 JSString* result = StringToLowerCase(cx, str); 939 if (!result) { 940 return false; 941 } 942 943 args.rval().setString(result); 944 return true; 945 } 946 947 #if JS_HAS_INTL_API 948 // Lithuanian, Turkish, and Azeri have language dependent case mappings. 949 static constexpr char LanguagesWithSpecialCasing[][3] = {"lt", "tr", "az"}; 950 951 bool js::LocaleHasDefaultCaseMapping(const char* locale) { 952 MOZ_ASSERT(locale); 953 954 size_t languageSubtagLength; 955 if (auto* sep = strchr(locale, '-')) { 956 languageSubtagLength = sep - locale; 957 } else { 958 languageSubtagLength = std::strlen(locale); 959 } 960 961 // Invalid locale identifiers default to the last-ditch locale "en-GB", which 962 // has default case mapping. 963 mozilla::Span<const char> span{locale, languageSubtagLength}; 964 { 965 // Tell the analysis the |IsStructurallyValidLanguageTag| function can't GC. 966 JS::AutoSuppressGCAnalysis nogc; 967 if (!mozilla::intl::IsStructurallyValidLanguageTag(span)) { 968 return true; 969 } 970 } 971 972 mozilla::intl::LanguageSubtag subtag{span}; 973 974 // Canonical case for the language subtag is lower-case 975 { 976 // Tell the analysis the |ToLowerCase| function can't GC. 977 JS::AutoSuppressGCAnalysis nogc; 978 979 subtag.ToLowerCase(); 980 } 981 982 // Replace outdated language subtags. Skips complex language mappings, which 983 // is okay because none of the languages with special casing are affected by 984 // complex language mapping. 985 { 986 // Tell the analysis the |LanguageMapping| function can't GC. 987 JS::AutoSuppressGCAnalysis nogc; 988 989 (void)mozilla::intl::Locale::LanguageMapping(subtag); 990 } 991 992 // Check for languages which don't use the default case mapping algorithm. 993 for (const auto& language : LanguagesWithSpecialCasing) { 994 if (subtag.EqualTo(language)) { 995 return false; 996 } 997 } 998 999 // Simple locale with default case mapping. (Or an invalid locale which 1000 // defaults to the last-ditch locale "en-GB".) 1001 return true; 1002 } 1003 1004 static const char* CaseMappingLocale(JSLinearString* locale) { 1005 MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag"); 1006 1007 // All strings in |languagesWithSpecialCasing| are of length two, so we 1008 // only need to compare the first two characters to find a matching locale. 1009 // ES2017 Intl, §9.2.2 BestAvailableLocale 1010 if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') { 1011 for (const auto& language : LanguagesWithSpecialCasing) { 1012 if (locale->latin1OrTwoByteChar(0) == language[0] && 1013 locale->latin1OrTwoByteChar(1) == language[1]) { 1014 return language; 1015 } 1016 } 1017 } 1018 1019 return nullptr; 1020 } 1021 1022 enum class TargetCase { Lower, Upper }; 1023 1024 /** 1025 * TransformCase ( S, locales, targetCase ) 1026 */ 1027 static JSLinearString* TransformCase(JSContext* cx, Handle<JSString*> string, 1028 Handle<Value> locales, 1029 TargetCase targetCase) { 1030 // Step 1. 1031 Rooted<intl::LocalesList> requestedLocales(cx, cx); 1032 if (!intl::CanonicalizeLocaleList(cx, locales, &requestedLocales)) { 1033 return nullptr; 1034 } 1035 1036 // Trivial case: When the input is empty, directly return the empty string. 1037 if (string->empty()) { 1038 return cx->emptyString(); 1039 } 1040 1041 // Steps 2-3. 1042 Rooted<JSLinearString*> requestedLocale(cx); 1043 if (!requestedLocales.empty()) { 1044 requestedLocale = requestedLocales[0]; 1045 } else { 1046 requestedLocale = cx->global()->globalIntlData().defaultLocale(cx); 1047 if (!requestedLocale) { 1048 return nullptr; 1049 } 1050 } 1051 1052 // Steps 4-10. 1053 const char* locale = CaseMappingLocale(requestedLocale); 1054 if (!locale) { 1055 // Call the default case conversion methods for language independent casing. 1056 return targetCase == TargetCase::Lower ? StringToLowerCase(cx, string) 1057 : StringToUpperCase(cx, string); 1058 } 1059 1060 AutoStableStringChars inputChars(cx); 1061 if (!inputChars.initTwoByte(cx, string)) { 1062 return nullptr; 1063 } 1064 mozilla::Range<const char16_t> input = inputChars.twoByteRange(); 1065 1066 // Note: maximum case mapping length is three characters, so the result 1067 // length might be > INT32_MAX. ICU will fail in this case. 1068 static_assert(JSString::MAX_LENGTH <= INT32_MAX, 1069 "String length must fit in int32_t for ICU"); 1070 1071 static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE; 1072 1073 intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx); 1074 1075 auto ok = 1076 targetCase == TargetCase::Lower 1077 ? mozilla::intl::String::ToLocaleLowerCase(locale, input, buffer) 1078 : mozilla::intl::String::ToLocaleUpperCase(locale, input, buffer); 1079 if (ok.isErr()) { 1080 intl::ReportInternalError(cx, ok.unwrapErr()); 1081 return nullptr; 1082 } 1083 1084 return buffer.toString(cx); 1085 } 1086 #endif 1087 1088 static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) { 1089 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", 1090 "toLocaleLowerCase"); 1091 CallArgs args = CallArgsFromVp(argc, vp); 1092 1093 // Steps 1-2. 1094 Rooted<JSString*> str( 1095 cx, ToStringForStringFunction(cx, "toLocaleLowerCase", args.thisv())); 1096 if (!str) { 1097 return false; 1098 } 1099 1100 #if JS_HAS_INTL_API 1101 // Step 3. 1102 auto* result = TransformCase(cx, str, args.get(0), TargetCase::Lower); 1103 if (!result) { 1104 return false; 1105 } 1106 1107 args.rval().setString(result); 1108 return true; 1109 #else 1110 /* 1111 * Forcefully ignore the first (or any) argument and return toLowerCase(), 1112 * ECMA has reserved that argument, presumably for defining the locale. 1113 */ 1114 if (cx->runtime()->localeCallbacks && 1115 cx->runtime()->localeCallbacks->localeToLowerCase) { 1116 Rooted<Value> result(cx); 1117 if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) { 1118 return false; 1119 } 1120 1121 args.rval().set(result); 1122 return true; 1123 } 1124 1125 Rooted<JSLinearString*> linear(cx, str->ensureLinear(cx)); 1126 if (!linear) { 1127 return false; 1128 } 1129 1130 JSString* result = StringToLowerCase(cx, linear); 1131 if (!result) { 1132 return false; 1133 } 1134 1135 args.rval().setString(result); 1136 return true; 1137 #endif 1138 } 1139 1140 static inline bool ToUpperCaseHasSpecialCasing(Latin1Char charCode) { 1141 // U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with 1142 // special casing rules, so detect it inline. 1143 bool hasUpperCaseSpecialCasing = 1144 charCode == unicode::LATIN_SMALL_LETTER_SHARP_S; 1145 MOZ_ASSERT(hasUpperCaseSpecialCasing == 1146 unicode::ChangesWhenUpperCasedSpecialCasing(charCode)); 1147 1148 return hasUpperCaseSpecialCasing; 1149 } 1150 1151 static inline bool ToUpperCaseHasSpecialCasing(char16_t charCode) { 1152 return unicode::ChangesWhenUpperCasedSpecialCasing(charCode); 1153 } 1154 1155 static inline size_t ToUpperCaseLengthSpecialCasing(Latin1Char charCode) { 1156 // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'. 1157 MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S); 1158 1159 return 2; 1160 } 1161 1162 static inline size_t ToUpperCaseLengthSpecialCasing(char16_t charCode) { 1163 MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode)); 1164 1165 return unicode::LengthUpperCaseSpecialCasing(charCode); 1166 } 1167 1168 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode, 1169 Latin1Char* elements, 1170 size_t* index) { 1171 // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'. 1172 MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S); 1173 static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character"); 1174 1175 elements[(*index)++] = 'S'; 1176 elements[(*index)++] = 'S'; 1177 } 1178 1179 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode, 1180 char16_t* elements, 1181 size_t* index) { 1182 unicode::AppendUpperCaseSpecialCasing(charCode, elements, index); 1183 } 1184 1185 // See ToLowerCaseImpl for an explanation of the parameters. 1186 template <typename DestChar, typename SrcChar> 1187 static size_t ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, 1188 size_t startIndex, size_t srcLength, 1189 size_t destLength) { 1190 static_assert(std::is_same_v<SrcChar, Latin1Char> || 1191 !std::is_same_v<DestChar, Latin1Char>, 1192 "cannot write non-Latin-1 characters into Latin-1 string"); 1193 MOZ_ASSERT(startIndex < srcLength); 1194 MOZ_ASSERT(srcLength <= destLength); 1195 1196 size_t j = startIndex; 1197 for (size_t i = startIndex; i < srcLength; i++) { 1198 char16_t c = srcChars[i]; 1199 if constexpr (!std::is_same_v<DestChar, Latin1Char>) { 1200 if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) { 1201 char16_t trail = srcChars[i + 1]; 1202 if (unicode::IsTrailSurrogate(trail)) { 1203 trail = unicode::ToUpperCaseNonBMPTrail(c, trail); 1204 destChars[j++] = c; 1205 destChars[j++] = trail; 1206 i++; 1207 continue; 1208 } 1209 } 1210 } 1211 1212 if (MOZ_UNLIKELY(c > 0x7f && 1213 ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) { 1214 // Return if the output buffer is too small. 1215 if (srcLength == destLength) { 1216 return i; 1217 } 1218 1219 ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j); 1220 continue; 1221 } 1222 1223 c = unicode::ToUpperCase(c); 1224 if constexpr (std::is_same_v<DestChar, Latin1Char>) { 1225 MOZ_ASSERT(c <= JSString::MAX_LATIN1_CHAR); 1226 } 1227 destChars[j++] = c; 1228 } 1229 1230 MOZ_ASSERT(j == destLength); 1231 return srcLength; 1232 } 1233 1234 template <typename CharT> 1235 static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex, 1236 size_t length) { 1237 size_t upperLength = length; 1238 for (size_t i = startIndex; i < length; i++) { 1239 char16_t c = chars[i]; 1240 1241 if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c))) { 1242 upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1; 1243 } 1244 } 1245 return upperLength; 1246 } 1247 1248 template <typename DestChar, typename SrcChar> 1249 static inline bool ToUpperCase(JSContext* cx, StringChars<DestChar>& newChars, 1250 const SrcChar* chars, size_t startIndex, 1251 size_t length, size_t* resultLength) { 1252 MOZ_ASSERT(startIndex < length); 1253 1254 AutoCheckCannotGC nogc; 1255 1256 *resultLength = length; 1257 if (!newChars.maybeAlloc(cx, length)) { 1258 return false; 1259 } 1260 1261 CopyChars(newChars.data(nogc), chars, startIndex); 1262 1263 size_t readChars = 1264 ToUpperCaseImpl(newChars.data(nogc), chars, startIndex, length, length); 1265 if (readChars < length) { 1266 size_t actualLength = ToUpperCaseLength(chars, readChars, length); 1267 1268 *resultLength = actualLength; 1269 if (!newChars.maybeRealloc(cx, length, actualLength)) { 1270 return false; 1271 } 1272 1273 MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.data(nogc), chars, 1274 readChars, length, actualLength)); 1275 } 1276 1277 return true; 1278 } 1279 1280 template <typename CharT> 1281 static JSLinearString* ToUpperCase(JSContext* cx, JSLinearString* str) { 1282 using Latin1StringChars = StringChars<Latin1Char>; 1283 using TwoByteStringChars = StringChars<char16_t>; 1284 1285 mozilla::MaybeOneOf<Latin1StringChars, TwoByteStringChars> newChars; 1286 const size_t length = str->length(); 1287 size_t resultLength; 1288 { 1289 AutoCheckCannotGC nogc; 1290 const CharT* chars = str->chars<CharT>(nogc); 1291 1292 // Most one element Latin-1 strings can be directly retrieved from the 1293 // static strings cache. 1294 if constexpr (std::is_same_v<CharT, Latin1Char>) { 1295 if (length == 1) { 1296 Latin1Char c = chars[0]; 1297 if (c != unicode::MICRO_SIGN && 1298 c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS && 1299 c != unicode::LATIN_SMALL_LETTER_SHARP_S) { 1300 char16_t upper = unicode::ToUpperCase(c); 1301 MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR); 1302 MOZ_ASSERT(StaticStrings::hasUnit(upper)); 1303 1304 return cx->staticStrings().getUnit(upper); 1305 } 1306 1307 MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR || 1308 ToUpperCaseHasSpecialCasing(c)); 1309 } 1310 } 1311 1312 // Look for the first character that changes when uppercased. 1313 size_t i = 0; 1314 for (; i < length; i++) { 1315 CharT c = chars[i]; 1316 if constexpr (!std::is_same_v<CharT, Latin1Char>) { 1317 if (unicode::IsLeadSurrogate(c) && i + 1 < length) { 1318 CharT trail = chars[i + 1]; 1319 if (unicode::IsTrailSurrogate(trail)) { 1320 if (unicode::ChangesWhenUpperCasedNonBMP(c, trail)) { 1321 break; 1322 } 1323 1324 i++; 1325 continue; 1326 } 1327 } 1328 } 1329 if (unicode::ChangesWhenUpperCased(c)) { 1330 break; 1331 } 1332 if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c))) { 1333 break; 1334 } 1335 } 1336 1337 // If no character needs to change, return the input string. 1338 if (i == length) { 1339 return str; 1340 } 1341 1342 // The string changes when uppercased, so we must create a new string. 1343 // Can it be Latin-1? 1344 // 1345 // If the original string is Latin-1, it can -- unless the string 1346 // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS, 1347 // the only Latin-1 codepoints that don't uppercase within Latin-1. 1348 // Search for those codepoints to decide whether the new string can be 1349 // Latin-1. 1350 // If the original string is a two-byte string, its uppercase form is 1351 // so rarely Latin-1 that we don't even consider creating a new 1352 // Latin-1 string. 1353 if constexpr (std::is_same_v<CharT, Latin1Char>) { 1354 bool resultIsLatin1 = std::none_of(chars + i, chars + length, [](auto c) { 1355 bool upperCaseIsTwoByte = 1356 c == unicode::MICRO_SIGN || 1357 c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS; 1358 MOZ_ASSERT(upperCaseIsTwoByte == 1359 (unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR)); 1360 return upperCaseIsTwoByte; 1361 }); 1362 1363 if (resultIsLatin1) { 1364 newChars.construct<Latin1StringChars>(cx); 1365 1366 if (!ToUpperCase(cx, newChars.ref<Latin1StringChars>(), chars, i, 1367 length, &resultLength)) { 1368 return nullptr; 1369 } 1370 } else { 1371 newChars.construct<TwoByteStringChars>(cx); 1372 1373 if (!ToUpperCase(cx, newChars.ref<TwoByteStringChars>(), chars, i, 1374 length, &resultLength)) { 1375 return nullptr; 1376 } 1377 } 1378 } else { 1379 newChars.construct<TwoByteStringChars>(cx); 1380 1381 if (!ToUpperCase(cx, newChars.ref<TwoByteStringChars>(), chars, i, length, 1382 &resultLength)) { 1383 return nullptr; 1384 } 1385 } 1386 } 1387 1388 auto toString = [&](auto& chars) { 1389 return chars.template toStringDontDeflate<CanGC>(cx, resultLength); 1390 }; 1391 1392 return newChars.mapNonEmpty(toString); 1393 } 1394 1395 JSLinearString* js::StringToUpperCase(JSContext* cx, JSString* string) { 1396 JSLinearString* linear = string->ensureLinear(cx); 1397 if (!linear) { 1398 return nullptr; 1399 } 1400 1401 if (linear->hasLatin1Chars()) { 1402 return ToUpperCase<Latin1Char>(cx, linear); 1403 } 1404 return ToUpperCase<char16_t>(cx, linear); 1405 } 1406 1407 static bool str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) { 1408 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "toUpperCase"); 1409 CallArgs args = CallArgsFromVp(argc, vp); 1410 1411 JSString* str = ToStringForStringFunction(cx, "toUpperCase", args.thisv()); 1412 if (!str) { 1413 return false; 1414 } 1415 1416 JSString* result = StringToUpperCase(cx, str); 1417 if (!result) { 1418 return false; 1419 } 1420 1421 args.rval().setString(result); 1422 return true; 1423 } 1424 1425 static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) { 1426 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", 1427 "toLocaleUpperCase"); 1428 CallArgs args = CallArgsFromVp(argc, vp); 1429 1430 Rooted<JSString*> str( 1431 cx, ToStringForStringFunction(cx, "toLocaleUpperCase", args.thisv())); 1432 if (!str) { 1433 return false; 1434 } 1435 1436 #if JS_HAS_INTL_API 1437 // Step 3. 1438 auto* result = TransformCase(cx, str, args.get(0), TargetCase::Upper); 1439 if (!result) { 1440 return false; 1441 } 1442 1443 args.rval().setString(result); 1444 return true; 1445 #else 1446 /* 1447 * Forcefully ignore the first (or any) argument and return toUpperCase(), 1448 * ECMA has reserved that argument, presumably for defining the locale. 1449 */ 1450 if (cx->runtime()->localeCallbacks && 1451 cx->runtime()->localeCallbacks->localeToUpperCase) { 1452 Rooted<Value> result(cx); 1453 if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) { 1454 return false; 1455 } 1456 1457 args.rval().set(result); 1458 return true; 1459 } 1460 1461 Rooted<JSLinearString*> linear(cx, str->ensureLinear(cx)); 1462 if (!linear) { 1463 return false; 1464 } 1465 1466 JSString* result = StringToUpperCase(cx, linear); 1467 if (!result) { 1468 return false; 1469 } 1470 1471 args.rval().setString(result); 1472 return true; 1473 #endif 1474 } 1475 1476 /** 1477 * String.prototype.localeCompare ( that [ , reserved1 [ , reserved2 ] ] ) 1478 * 1479 * ES2025 draft rev 76814cbd5d7842c2a99d28e6e8c7833f1de5bee0 1480 * 1481 * String.prototype.localeCompare ( that [ , locales [ , options ] ] ) 1482 * 1483 * ES2025 Intl draft rev 6827e6e40b45fb313472595be31352451a2d85fa 1484 */ 1485 static bool str_localeCompare(JSContext* cx, unsigned argc, Value* vp) { 1486 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", 1487 "localeCompare"); 1488 CallArgs args = CallArgsFromVp(argc, vp); 1489 1490 // Steps 1-2. 1491 RootedString str( 1492 cx, ToStringForStringFunction(cx, "localeCompare", args.thisv())); 1493 if (!str) { 1494 return false; 1495 } 1496 1497 // Step 3. 1498 RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0))); 1499 if (!thatStr) { 1500 return false; 1501 } 1502 1503 #if JS_HAS_INTL_API 1504 HandleValue locales = args.get(1); 1505 HandleValue options = args.get(2); 1506 1507 // Step 4. 1508 Rooted<CollatorObject*> collator( 1509 cx, intl::GetOrCreateCollator(cx, locales, options)); 1510 if (!collator) { 1511 return false; 1512 } 1513 1514 // Step 5. 1515 return intl::CompareStrings(cx, collator, str, thatStr, args.rval()); 1516 #else 1517 // Delegate to JSLocaleCallbacks when Intl functionality is not exposed. 1518 if (cx->runtime()->localeCallbacks && 1519 cx->runtime()->localeCallbacks->localeCompare) { 1520 RootedValue result(cx); 1521 if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr, 1522 &result)) { 1523 return false; 1524 } 1525 1526 args.rval().set(result); 1527 return true; 1528 } 1529 1530 int32_t result; 1531 if (!CompareStrings(cx, str, thatStr, &result)) { 1532 return false; 1533 } 1534 1535 args.rval().setInt32(result); 1536 return true; 1537 #endif // JS_HAS_INTL_API 1538 } 1539 1540 #if JS_HAS_INTL_API 1541 1542 // ES2017 draft rev 45e890512fd77add72cc0ee742785f9f6f6482de 1543 // 21.1.3.12 String.prototype.normalize ( [ form ] ) 1544 // 1545 // String.prototype.normalize is only implementable if ICU's normalization 1546 // functionality is available. 1547 static bool str_normalize(JSContext* cx, unsigned argc, Value* vp) { 1548 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "normalize"); 1549 CallArgs args = CallArgsFromVp(argc, vp); 1550 1551 // Steps 1-2. 1552 RootedString str(cx, 1553 ToStringForStringFunction(cx, "normalize", args.thisv())); 1554 if (!str) { 1555 return false; 1556 } 1557 1558 using NormalizationForm = mozilla::intl::String::NormalizationForm; 1559 1560 NormalizationForm form; 1561 if (!args.hasDefined(0)) { 1562 // Step 3. 1563 form = NormalizationForm::NFC; 1564 } else { 1565 // Step 4. 1566 JSLinearString* formStr = ArgToLinearString(cx, args, 0); 1567 if (!formStr) { 1568 return false; 1569 } 1570 1571 // Step 5. 1572 if (EqualStrings(formStr, cx->names().NFC)) { 1573 form = NormalizationForm::NFC; 1574 } else if (EqualStrings(formStr, cx->names().NFD)) { 1575 form = NormalizationForm::NFD; 1576 } else if (EqualStrings(formStr, cx->names().NFKC)) { 1577 form = NormalizationForm::NFKC; 1578 } else if (EqualStrings(formStr, cx->names().NFKD)) { 1579 form = NormalizationForm::NFKD; 1580 } else { 1581 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 1582 JSMSG_INVALID_NORMALIZE_FORM); 1583 return false; 1584 } 1585 } 1586 1587 // Latin-1 strings are already in Normalization Form C. 1588 if (form == NormalizationForm::NFC && str->hasLatin1Chars()) { 1589 // Step 7. 1590 args.rval().setString(str); 1591 return true; 1592 } 1593 1594 // Step 6. 1595 AutoStableStringChars stableChars(cx); 1596 if (!stableChars.initTwoByte(cx, str)) { 1597 return false; 1598 } 1599 1600 mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange(); 1601 1602 static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE; 1603 1604 intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx); 1605 1606 auto alreadyNormalized = 1607 mozilla::intl::String::Normalize(form, srcChars, buffer); 1608 if (alreadyNormalized.isErr()) { 1609 intl::ReportInternalError(cx, alreadyNormalized.unwrapErr()); 1610 return false; 1611 } 1612 1613 using AlreadyNormalized = mozilla::intl::String::AlreadyNormalized; 1614 1615 // Return if the input string is already normalized. 1616 if (alreadyNormalized.unwrap() == AlreadyNormalized::Yes) { 1617 // Step 7. 1618 args.rval().setString(str); 1619 return true; 1620 } 1621 1622 JSString* ns = buffer.toString(cx); 1623 if (!ns) { 1624 return false; 1625 } 1626 1627 // Step 7. 1628 args.rval().setString(ns); 1629 return true; 1630 } 1631 1632 #endif // JS_HAS_INTL_API 1633 1634 /** 1635 * IsStringWellFormedUnicode ( string ) 1636 * https://tc39.es/ecma262/#sec-isstringwellformedunicode 1637 */ 1638 static bool IsStringWellFormedUnicode(JSContext* cx, JSString* str, 1639 size_t* isWellFormedUpTo) { 1640 MOZ_ASSERT(isWellFormedUpTo); 1641 *isWellFormedUpTo = 0; 1642 1643 AutoCheckCannotGC nogc; 1644 1645 size_t len = str->length(); 1646 1647 // Latin1 chars are well-formed. 1648 if (str->hasLatin1Chars()) { 1649 *isWellFormedUpTo = len; 1650 return true; 1651 } 1652 1653 JSLinearString* linear = str->ensureLinear(cx); 1654 if (!linear) { 1655 return false; 1656 } 1657 1658 *isWellFormedUpTo = Utf16ValidUpTo(Span{linear->twoByteChars(nogc), len}); 1659 return true; 1660 } 1661 1662 /** 1663 * Well-Formed Unicode Strings (Stage 3 proposal) 1664 * 1665 * String.prototype.isWellFormed 1666 * https://tc39.es/proposal-is-usv-string/#sec-string.prototype.iswellformed 1667 */ 1668 static bool str_isWellFormed(JSContext* cx, unsigned argc, Value* vp) { 1669 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "isWellFormed"); 1670 CallArgs args = CallArgsFromVp(argc, vp); 1671 1672 // Step 1. Let O be ? RequireObjectCoercible(this value). 1673 // Step 2. Let S be ? ToString(O). 1674 JSString* str = ToStringForStringFunction(cx, "isWellFormed", args.thisv()); 1675 if (!str) { 1676 return false; 1677 } 1678 1679 // Step 3. Return IsStringWellFormedUnicode(S). 1680 size_t isWellFormedUpTo; 1681 if (!IsStringWellFormedUnicode(cx, str, &isWellFormedUpTo)) { 1682 return false; 1683 } 1684 MOZ_ASSERT(isWellFormedUpTo <= str->length()); 1685 1686 args.rval().setBoolean(isWellFormedUpTo == str->length()); 1687 return true; 1688 } 1689 1690 /** 1691 * Well-Formed Unicode Strings (Stage 3 proposal) 1692 * 1693 * String.prototype.toWellFormed 1694 * https://tc39.es/proposal-is-usv-string/#sec-string.prototype.towellformed 1695 */ 1696 static bool str_toWellFormed(JSContext* cx, unsigned argc, Value* vp) { 1697 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "toWellFormed"); 1698 CallArgs args = CallArgsFromVp(argc, vp); 1699 1700 // Step 1. Let O be ? RequireObjectCoercible(this value). 1701 // Step 2. Let S be ? ToString(O). 1702 RootedString str(cx, 1703 ToStringForStringFunction(cx, "toWellFormed", args.thisv())); 1704 if (!str) { 1705 return false; 1706 } 1707 1708 // Step 3. Let strLen be the length of S. 1709 size_t len = str->length(); 1710 1711 // If the string itself is well-formed, return it. 1712 size_t isWellFormedUpTo; 1713 if (!IsStringWellFormedUnicode(cx, str, &isWellFormedUpTo)) { 1714 return false; 1715 } 1716 if (isWellFormedUpTo == len) { 1717 args.rval().setString(str); 1718 return true; 1719 } 1720 MOZ_ASSERT(isWellFormedUpTo < len); 1721 1722 // Step 4-6 1723 StringChars<char16_t> newChars(cx); 1724 if (!newChars.maybeAlloc(cx, len)) { 1725 return false; 1726 } 1727 1728 { 1729 AutoCheckCannotGC nogc; 1730 1731 JSLinearString* linear = str->ensureLinear(cx); 1732 MOZ_ASSERT(linear, "IsStringWellFormedUnicode linearized the string"); 1733 1734 PodCopy(newChars.data(nogc), linear->twoByteChars(nogc), len); 1735 1736 auto span = mozilla::Span{newChars.data(nogc), len}; 1737 1738 // Replace the character. 1739 span[isWellFormedUpTo] = unicode::REPLACEMENT_CHARACTER; 1740 1741 // Check any remaining characters. 1742 auto remaining = span.From(isWellFormedUpTo + 1); 1743 if (!remaining.IsEmpty()) { 1744 EnsureUtf16ValiditySpan(remaining); 1745 } 1746 } 1747 1748 JSString* result = newChars.toStringDontDeflateNonStatic<CanGC>(cx, len); 1749 if (!result) { 1750 return false; 1751 } 1752 1753 // Step 7. Return result. 1754 args.rval().setString(result); 1755 return true; 1756 } 1757 1758 // Clamp |value| to a string index between 0 and |length|. 1759 static MOZ_ALWAYS_INLINE bool ToClampedStringIndex(JSContext* cx, 1760 Handle<Value> value, 1761 uint32_t length, 1762 uint32_t* result) { 1763 // Handle the common case of int32 indices first. 1764 if (value.isInt32()) { 1765 int32_t i = value.toInt32(); 1766 *result = std::min(uint32_t(std::max(i, 0)), length); 1767 return true; 1768 } 1769 1770 double d; 1771 if (!ToInteger(cx, value, &d)) { 1772 return false; 1773 } 1774 *result = uint32_t(std::clamp(d, 0.0, double(length))); 1775 return true; 1776 } 1777 1778 // Return |Some(index)| if |value| is a string index between 0 and |length|. 1779 // Otherwise return |Nothing|. 1780 static MOZ_ALWAYS_INLINE bool ToStringIndex(JSContext* cx, Handle<Value> value, 1781 size_t length, 1782 mozilla::Maybe<size_t>* result) { 1783 // Handle the common case of int32 indices first. 1784 if (MOZ_LIKELY(value.isInt32())) { 1785 size_t index = size_t(value.toInt32()); 1786 if (index < length) { 1787 *result = mozilla::Some(index); 1788 } 1789 return true; 1790 } 1791 1792 double index = 0.0; 1793 if (!ToInteger(cx, value, &index)) { 1794 return false; 1795 } 1796 if (0 <= index && index < length) { 1797 *result = mozilla::Some(size_t(index)); 1798 } 1799 return true; 1800 } 1801 1802 // Return |Some(index)| if |value| is a relative string index between 0 and 1803 // |length|. Otherwise return |Nothing|. 1804 static MOZ_ALWAYS_INLINE bool ToRelativeStringIndex( 1805 JSContext* cx, Handle<Value> value, size_t length, 1806 mozilla::Maybe<size_t>* result) { 1807 // Handle the common case of int32 indices first. 1808 if (MOZ_LIKELY(value.isInt32())) { 1809 int32_t index = value.toInt32(); 1810 if (index < 0) { 1811 index += int32_t(length); 1812 } 1813 if (size_t(index) < length) { 1814 *result = mozilla::Some(size_t(index)); 1815 } 1816 return true; 1817 } 1818 1819 double index = 0.0; 1820 if (!ToInteger(cx, value, &index)) { 1821 return false; 1822 } 1823 if (index < 0) { 1824 index += length; 1825 } 1826 if (0 <= index && index < length) { 1827 *result = mozilla::Some(size_t(index)); 1828 } 1829 return true; 1830 } 1831 1832 /** 1833 * 22.1.3.2 String.prototype.charAt ( pos ) 1834 * 1835 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35 1836 */ 1837 static bool str_charAt(JSContext* cx, unsigned argc, Value* vp) { 1838 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "charAt"); 1839 CallArgs args = CallArgsFromVp(argc, vp); 1840 1841 // Steps 1-2. 1842 RootedString str(cx, ToStringForStringFunction(cx, "charAt", args.thisv())); 1843 if (!str) { 1844 return false; 1845 } 1846 1847 // Step 3. 1848 mozilla::Maybe<size_t> index{}; 1849 if (!ToStringIndex(cx, args.get(0), str->length(), &index)) { 1850 return false; 1851 } 1852 1853 // Steps 4-5. 1854 if (index.isNothing()) { 1855 args.rval().setString(cx->runtime()->emptyString); 1856 return true; 1857 } 1858 MOZ_ASSERT(*index < str->length()); 1859 1860 // Step 6. 1861 auto* result = cx->staticStrings().getUnitStringForElement(cx, str, *index); 1862 if (!result) { 1863 return false; 1864 } 1865 args.rval().setString(result); 1866 return true; 1867 } 1868 1869 /** 1870 * 22.1.3.3 String.prototype.charCodeAt ( pos ) 1871 * 1872 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35 1873 */ 1874 bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) { 1875 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "charCodeAt"); 1876 CallArgs args = CallArgsFromVp(argc, vp); 1877 1878 // Steps 1-2. 1879 RootedString str(cx, 1880 ToStringForStringFunction(cx, "charCodeAt", args.thisv())); 1881 if (!str) { 1882 return false; 1883 } 1884 1885 // Step 3. 1886 mozilla::Maybe<size_t> index{}; 1887 if (!ToStringIndex(cx, args.get(0), str->length(), &index)) { 1888 return false; 1889 } 1890 1891 // Steps 4-5. 1892 if (index.isNothing()) { 1893 args.rval().setNaN(); 1894 return true; 1895 } 1896 MOZ_ASSERT(*index < str->length()); 1897 1898 // Step 6. 1899 char16_t c; 1900 if (!str->getChar(cx, *index, &c)) { 1901 return false; 1902 } 1903 args.rval().setInt32(c); 1904 return true; 1905 } 1906 1907 /** 1908 * 22.1.3.4 String.prototype.codePointAt ( pos ) 1909 * 1910 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35 1911 */ 1912 bool js::str_codePointAt(JSContext* cx, unsigned argc, Value* vp) { 1913 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "codePointAt"); 1914 CallArgs args = CallArgsFromVp(argc, vp); 1915 1916 // Steps 1-2. 1917 RootedString str(cx, 1918 ToStringForStringFunction(cx, "codePointAt", args.thisv())); 1919 if (!str) { 1920 return false; 1921 } 1922 1923 // Step 3. 1924 mozilla::Maybe<size_t> index{}; 1925 if (!ToStringIndex(cx, args.get(0), str->length(), &index)) { 1926 return false; 1927 } 1928 1929 // Steps 4-5. 1930 if (index.isNothing()) { 1931 args.rval().setUndefined(); 1932 return true; 1933 } 1934 MOZ_ASSERT(*index < str->length()); 1935 1936 // Step 6. 1937 char32_t codePoint; 1938 if (!str->getCodePoint(cx, *index, &codePoint)) { 1939 return false; 1940 } 1941 1942 // Step 7. 1943 args.rval().setInt32(codePoint); 1944 return true; 1945 } 1946 1947 /** 1948 * 22.1.3.1 String.prototype.at ( index ) 1949 * 1950 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35 1951 */ 1952 static bool str_at(JSContext* cx, unsigned argc, Value* vp) { 1953 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "at"); 1954 CallArgs args = CallArgsFromVp(argc, vp); 1955 1956 // Steps 1-2. 1957 RootedString str(cx, ToStringForStringFunction(cx, "at", args.thisv())); 1958 if (!str) { 1959 return false; 1960 } 1961 1962 // Steps 3-6. 1963 mozilla::Maybe<size_t> index{}; 1964 if (!ToRelativeStringIndex(cx, args.get(0), str->length(), &index)) { 1965 return false; 1966 } 1967 1968 // Step 7. 1969 if (index.isNothing()) { 1970 args.rval().setUndefined(); 1971 return true; 1972 } 1973 MOZ_ASSERT(*index < str->length()); 1974 1975 // Step 8. 1976 auto* result = cx->staticStrings().getUnitStringForElement(cx, str, *index); 1977 if (!result) { 1978 return false; 1979 } 1980 args.rval().setString(result); 1981 return true; 1982 } 1983 1984 /* 1985 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen. 1986 * The patlen argument must be positive and no greater than sBMHPatLenMax. 1987 * 1988 * Return the index of pat in text, or -1 if not found. 1989 */ 1990 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */ 1991 static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */ 1992 static const int sBMHBadPattern = 1993 -2; /* return value if pat is not ISO-Latin-1 */ 1994 1995 template <typename TextChar, typename PatChar> 1996 static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen, 1997 const PatChar* pat, uint32_t patLen) { 1998 MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax); 1999 2000 uint8_t skip[sBMHCharSetSize]; 2001 for (uint32_t i = 0; i < sBMHCharSetSize; i++) { 2002 skip[i] = uint8_t(patLen); 2003 } 2004 2005 uint32_t patLast = patLen - 1; 2006 for (uint32_t i = 0; i < patLast; i++) { 2007 char16_t c = pat[i]; 2008 if (c >= sBMHCharSetSize) { 2009 return sBMHBadPattern; 2010 } 2011 skip[c] = uint8_t(patLast - i); 2012 } 2013 2014 for (uint32_t k = patLast; k < textLen;) { 2015 for (uint32_t i = k, j = patLast;; i--, j--) { 2016 if (text[i] != pat[j]) { 2017 break; 2018 } 2019 if (j == 0) { 2020 return static_cast<int>(i); /* safe: max string size */ 2021 } 2022 } 2023 2024 char16_t c = text[k]; 2025 k += (c >= sBMHCharSetSize) ? patLen : skip[c]; 2026 } 2027 return -1; 2028 } 2029 2030 template <typename TextChar, typename PatChar> 2031 struct MemCmp { 2032 using Extent = uint32_t; 2033 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*, 2034 uint32_t patLen) { 2035 return (patLen - 2) * sizeof(PatChar); 2036 } 2037 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, 2038 Extent extent) { 2039 MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar)); 2040 return memcmp(p, t, extent) == 0; 2041 } 2042 }; 2043 2044 template <typename TextChar, typename PatChar> 2045 struct ManualCmp { 2046 using Extent = const PatChar*; 2047 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat, 2048 uint32_t patLen) { 2049 return pat + patLen; 2050 } 2051 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, 2052 Extent extent) { 2053 for (; p != extent; ++p, ++t) { 2054 if (*p != *t) { 2055 return false; 2056 } 2057 } 2058 return true; 2059 } 2060 }; 2061 2062 template <class InnerMatch, typename TextChar, typename PatChar> 2063 static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat, 2064 uint32_t patlen) { 2065 MOZ_ASSERT(patlen > 1); 2066 2067 const typename InnerMatch::Extent extent = 2068 InnerMatch::computeExtent(pat, patlen); 2069 2070 uint32_t i = 0; 2071 uint32_t n = textlen - patlen + 1; 2072 2073 while (i < n) { 2074 const TextChar* pos; 2075 2076 // This is a bit awkward. Consider the case where we're searching "abcdef" 2077 // for "def". n will be 4, because we know in advance that the last place we 2078 // can *start* a successful search will be at 'd'. However, if we just use n 2079 // - i, then our first search will be looking through "abcd" for "de", 2080 // because our memchr2xN functions search for two characters at a time. So 2081 // we just have to compensate by adding 1. This will never exceed textlen 2082 // because we know patlen is at least two. 2083 size_t searchLen = n - i + 1; 2084 if (sizeof(TextChar) == 1) { 2085 MOZ_ASSERT(pat[0] <= 0xff); 2086 pos = (TextChar*)SIMD::memchr2x8((char*)text + i, pat[0], pat[1], 2087 searchLen); 2088 } else { 2089 pos = (TextChar*)SIMD::memchr2x16((char16_t*)(text + i), char16_t(pat[0]), 2090 char16_t(pat[1]), searchLen); 2091 } 2092 2093 if (pos == nullptr) { 2094 return -1; 2095 } 2096 2097 i = static_cast<uint32_t>(pos - text); 2098 const uint32_t inlineLookaheadChars = 2; 2099 if (InnerMatch::match(pat + inlineLookaheadChars, 2100 text + i + inlineLookaheadChars, extent)) { 2101 return i; 2102 } 2103 2104 i += 1; 2105 } 2106 return -1; 2107 } 2108 2109 template <typename TextChar, typename PatChar> 2110 static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen, 2111 const PatChar* pat, uint32_t patLen) { 2112 if (patLen == 0) { 2113 return 0; 2114 } 2115 if (textLen < patLen) { 2116 return -1; 2117 } 2118 2119 if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) { 2120 return -1; 2121 } 2122 2123 if (patLen == 1) { 2124 const TextChar* pos; 2125 if (sizeof(TextChar) == 1) { 2126 MOZ_ASSERT(pat[0] <= 0xff); 2127 pos = (TextChar*)SIMD::memchr8((char*)text, pat[0], textLen); 2128 } else { 2129 pos = 2130 (TextChar*)SIMD::memchr16((char16_t*)text, char16_t(pat[0]), textLen); 2131 } 2132 2133 if (pos == nullptr) { 2134 return -1; 2135 } 2136 2137 return pos - text; 2138 } 2139 2140 // We use a fast two-character-wide search in Matcher below, so we need to 2141 // validate that pat[1] isn't outside the latin1 range up front if the 2142 // sizes are different. 2143 if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[1] > 0xff) { 2144 return -1; 2145 } 2146 2147 /* 2148 * If the text or pattern string is short, BMH will be more expensive than 2149 * the basic linear scan due to initialization cost and a more complex loop 2150 * body. While the correct threshold is input-dependent, we can make a few 2151 * conservative observations: 2152 * - When |textLen| is "big enough", the initialization time will be 2153 * proportionally small, so the worst-case slowdown is minimized. 2154 * - When |patLen| is "too small", even the best case for BMH will be 2155 * slower than a simple scan for large |textLen| due to the more complex 2156 * loop body of BMH. 2157 * From this, the values for "big enough" and "too small" are determined 2158 * empirically. See bug 526348. 2159 */ 2160 if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) { 2161 int index = BoyerMooreHorspool(text, textLen, pat, patLen); 2162 if (index != sBMHBadPattern) { 2163 return index; 2164 } 2165 } 2166 2167 /* 2168 * For big patterns with large potential overlap we want the SIMD-optimized 2169 * speed of memcmp. For small patterns, a simple loop is faster. We also can't 2170 * use memcmp if one of the strings is TwoByte and the other is Latin-1. 2171 */ 2172 return (patLen > 128 && std::is_same_v<TextChar, PatChar>) 2173 ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>( 2174 text, textLen, pat, patLen) 2175 : Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>( 2176 text, textLen, pat, patLen); 2177 } 2178 2179 static int32_t StringMatch(const JSLinearString* text, 2180 const JSLinearString* pat, uint32_t start = 0) { 2181 MOZ_ASSERT(start <= text->length()); 2182 uint32_t textLen = text->length() - start; 2183 uint32_t patLen = pat->length(); 2184 2185 int match; 2186 AutoCheckCannotGC nogc; 2187 if (text->hasLatin1Chars()) { 2188 const Latin1Char* textChars = text->latin1Chars(nogc) + start; 2189 if (pat->hasLatin1Chars()) { 2190 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen); 2191 } else { 2192 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen); 2193 } 2194 } else { 2195 const char16_t* textChars = text->twoByteChars(nogc) + start; 2196 if (pat->hasLatin1Chars()) { 2197 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen); 2198 } else { 2199 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen); 2200 } 2201 } 2202 2203 return (match == -1) ? -1 : start + match; 2204 } 2205 2206 static const size_t sRopeMatchThresholdRatioLog2 = 4; 2207 2208 int js::StringFindPattern(const JSLinearString* text, const JSLinearString* pat, 2209 size_t start) { 2210 return StringMatch(text, pat, start); 2211 } 2212 2213 using LinearStringVector = Vector<JSLinearString*, 16, SystemAllocPolicy>; 2214 2215 template <typename TextChar, typename PatChar> 2216 static int RopeMatchImpl(const AutoCheckCannotGC& nogc, 2217 LinearStringVector& strings, const PatChar* pat, 2218 size_t patLen) { 2219 /* Absolute offset from the beginning of the logical text string. */ 2220 int pos = 0; 2221 2222 for (JSLinearString** outerp = strings.begin(); outerp != strings.end(); 2223 ++outerp) { 2224 /* Try to find a match within 'outer'. */ 2225 JSLinearString* outer = *outerp; 2226 const TextChar* chars = outer->chars<TextChar>(nogc); 2227 size_t len = outer->length(); 2228 int matchResult = StringMatch(chars, len, pat, patLen); 2229 if (matchResult != -1) { 2230 /* Matched! */ 2231 return pos + matchResult; 2232 } 2233 2234 /* Try to find a match starting in 'outer' and running into other nodes. */ 2235 const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1); 2236 const TextChar* const textend = chars + len; 2237 const PatChar p0 = *pat; 2238 const PatChar* const p1 = pat + 1; 2239 const PatChar* const patend = pat + patLen; 2240 for (const TextChar* t = text; t != textend;) { 2241 if (*t++ != p0) { 2242 continue; 2243 } 2244 2245 JSLinearString** innerp = outerp; 2246 const TextChar* ttend = textend; 2247 const TextChar* tt = t; 2248 for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) { 2249 while (tt == ttend) { 2250 if (++innerp == strings.end()) { 2251 return -1; 2252 } 2253 2254 JSLinearString* inner = *innerp; 2255 tt = inner->chars<TextChar>(nogc); 2256 ttend = tt + inner->length(); 2257 } 2258 if (*pp != *tt) { 2259 goto break_continue; 2260 } 2261 } 2262 2263 /* Matched! */ 2264 return pos + (t - chars) - 1; /* -1 because of *t++ above */ 2265 2266 break_continue:; 2267 } 2268 2269 pos += len; 2270 } 2271 2272 return -1; 2273 } 2274 2275 /* 2276 * RopeMatch takes the text to search and the pattern to search for in the text. 2277 * RopeMatch returns false on OOM and otherwise returns the match index through 2278 * the 'match' outparam (-1 for not found). 2279 */ 2280 static bool RopeMatch(JSContext* cx, JSRope* text, const JSLinearString* pat, 2281 int* match) { 2282 uint32_t patLen = pat->length(); 2283 if (patLen == 0) { 2284 *match = 0; 2285 return true; 2286 } 2287 if (text->length() < patLen) { 2288 *match = -1; 2289 return true; 2290 } 2291 2292 /* 2293 * List of leaf nodes in the rope. If we run out of memory when trying to 2294 * append to this list, we can still fall back to StringMatch, so use the 2295 * system allocator so we don't report OOM in that case. 2296 */ 2297 LinearStringVector strings; 2298 2299 /* 2300 * We don't want to do rope matching if there is a poor node-to-char ratio, 2301 * since this means spending a lot of time in the match loop below. We also 2302 * need to build the list of leaf nodes. Do both here: iterate over the 2303 * nodes so long as there are not too many. 2304 * 2305 * We also don't use rope matching if the rope contains both Latin-1 and 2306 * TwoByte nodes, to simplify the match algorithm. 2307 */ 2308 { 2309 size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2; 2310 StringSegmentRange r(cx); 2311 if (!r.init(text)) { 2312 return false; 2313 } 2314 2315 bool textIsLatin1 = text->hasLatin1Chars(); 2316 while (!r.empty()) { 2317 if (threshold-- == 0 || r.front()->hasLatin1Chars() != textIsLatin1 || 2318 !strings.append(r.front())) { 2319 JSLinearString* linear = text->ensureLinear(cx); 2320 if (!linear) { 2321 return false; 2322 } 2323 2324 *match = StringMatch(linear, pat); 2325 return true; 2326 } 2327 if (!r.popFront()) { 2328 return false; 2329 } 2330 } 2331 } 2332 2333 AutoCheckCannotGC nogc; 2334 if (text->hasLatin1Chars()) { 2335 if (pat->hasLatin1Chars()) { 2336 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc), 2337 patLen); 2338 } else { 2339 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc), 2340 patLen); 2341 } 2342 } else { 2343 if (pat->hasLatin1Chars()) { 2344 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc), 2345 patLen); 2346 } else { 2347 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc), 2348 patLen); 2349 } 2350 } 2351 2352 return true; 2353 } 2354 2355 static MOZ_ALWAYS_INLINE bool ReportErrorIfFirstArgIsRegExp( 2356 JSContext* cx, const CallArgs& args) { 2357 // Only call IsRegExp if the first argument is definitely an object, so we 2358 // don't pay the cost of an additional function call in the common case. 2359 if (args.length() == 0 || !args[0].isObject()) { 2360 return true; 2361 } 2362 2363 bool isRegExp; 2364 if (!IsRegExp(cx, args[0], &isRegExp)) { 2365 return false; 2366 } 2367 2368 if (isRegExp) { 2369 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 2370 JSMSG_INVALID_ARG_TYPE, "first", "", 2371 "Regular Expression"); 2372 return false; 2373 } 2374 return true; 2375 } 2376 2377 // ES2026 draft rev a562082b031d89d00ee667181ce8a6158656bd4b 2378 // 22.1.3.8 String.prototype.includes ( searchString [ , position ] ) 2379 bool js::str_includes(JSContext* cx, unsigned argc, Value* vp) { 2380 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "includes"); 2381 CallArgs args = CallArgsFromVp(argc, vp); 2382 2383 // Steps 1-2. 2384 RootedString str(cx, ToStringForStringFunction(cx, "includes", args.thisv())); 2385 if (!str) { 2386 return false; 2387 } 2388 2389 // Steps 3-4. 2390 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) { 2391 return false; 2392 } 2393 2394 // Step 5. 2395 Rooted<JSLinearString*> searchStr(cx, ArgToLinearString(cx, args, 0)); 2396 if (!searchStr) { 2397 return false; 2398 } 2399 2400 // Steps 6-9. 2401 uint32_t start = 0; 2402 if (args.hasDefined(1)) { 2403 if (!ToClampedStringIndex(cx, args[1], str->length(), &start)) { 2404 return false; 2405 } 2406 } 2407 2408 // Steps 10-12. 2409 JSLinearString* text = str->ensureLinear(cx); 2410 if (!text) { 2411 return false; 2412 } 2413 2414 args.rval().setBoolean(StringMatch(text, searchStr, start) != -1); 2415 return true; 2416 } 2417 2418 bool js::StringIncludes(JSContext* cx, HandleString string, 2419 HandleString searchString, bool* result) { 2420 JSLinearString* text = string->ensureLinear(cx); 2421 if (!text) { 2422 return false; 2423 } 2424 2425 JSLinearString* searchStr = searchString->ensureLinear(cx); 2426 if (!searchStr) { 2427 return false; 2428 } 2429 2430 *result = StringMatch(text, searchStr, 0) != -1; 2431 return true; 2432 } 2433 2434 // ES2026 draft rev a562082b031d89d00ee667181ce8a6158656bd4b 2435 // 22.1.3.9 String.prototype.indexOf ( searchString [ , position ] ) 2436 bool js::str_indexOf(JSContext* cx, unsigned argc, Value* vp) { 2437 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "indexOf"); 2438 CallArgs args = CallArgsFromVp(argc, vp); 2439 2440 // Steps 1-2. 2441 RootedString str(cx, ToStringForStringFunction(cx, "indexOf", args.thisv())); 2442 if (!str) { 2443 return false; 2444 } 2445 2446 // Step 3. 2447 Rooted<JSLinearString*> searchStr(cx, ArgToLinearString(cx, args, 0)); 2448 if (!searchStr) { 2449 return false; 2450 } 2451 2452 // Steps 4-7. 2453 uint32_t start = 0; 2454 if (args.hasDefined(1)) { 2455 if (!ToClampedStringIndex(cx, args[1], str->length(), &start)) { 2456 return false; 2457 } 2458 } 2459 2460 if (str == searchStr) { 2461 // AngularJS often invokes "false".indexOf("false"). This check should 2462 // be cheap enough to not hurt anything else. 2463 args.rval().setInt32(start == 0 ? 0 : -1); 2464 return true; 2465 } 2466 2467 // Steps 8-10. 2468 JSLinearString* text = str->ensureLinear(cx); 2469 if (!text) { 2470 return false; 2471 } 2472 2473 args.rval().setInt32(StringMatch(text, searchStr, start)); 2474 return true; 2475 } 2476 2477 bool js::StringIndexOf(JSContext* cx, HandleString string, 2478 HandleString searchString, int32_t* result) { 2479 if (string == searchString) { 2480 *result = 0; 2481 return true; 2482 } 2483 2484 JSLinearString* text = string->ensureLinear(cx); 2485 if (!text) { 2486 return false; 2487 } 2488 2489 JSLinearString* searchStr = searchString->ensureLinear(cx); 2490 if (!searchStr) { 2491 return false; 2492 } 2493 2494 *result = StringMatch(text, searchStr, 0); 2495 return true; 2496 } 2497 2498 template <typename TextChar, typename PatChar> 2499 static int32_t LastIndexOfImpl(const TextChar* text, size_t textLen, 2500 const PatChar* pat, size_t patLen, 2501 size_t start) { 2502 MOZ_ASSERT(patLen > 0); 2503 MOZ_ASSERT(patLen <= textLen); 2504 MOZ_ASSERT(start <= textLen - patLen); 2505 2506 const PatChar p0 = *pat; 2507 const PatChar* patNext = pat + 1; 2508 const PatChar* patEnd = pat + patLen; 2509 2510 for (const TextChar* t = text + start; t >= text; --t) { 2511 if (*t == p0) { 2512 const TextChar* t1 = t + 1; 2513 for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) { 2514 if (*t1 != *p1) { 2515 goto break_continue; 2516 } 2517 } 2518 2519 return static_cast<int32_t>(t - text); 2520 } 2521 break_continue:; 2522 } 2523 2524 return -1; 2525 } 2526 2527 static int32_t LastIndexOf(const JSLinearString* text, 2528 const JSLinearString* searchStr, size_t start) { 2529 AutoCheckCannotGC nogc; 2530 2531 size_t len = text->length(); 2532 size_t searchLen = searchStr->length(); 2533 2534 if (text->hasLatin1Chars()) { 2535 const Latin1Char* textChars = text->latin1Chars(nogc); 2536 if (searchStr->hasLatin1Chars()) { 2537 return LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc), 2538 searchLen, start); 2539 } 2540 return LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc), 2541 searchLen, start); 2542 } 2543 2544 const char16_t* textChars = text->twoByteChars(nogc); 2545 if (searchStr->hasLatin1Chars()) { 2546 return LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc), 2547 searchLen, start); 2548 } 2549 return LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc), 2550 searchLen, start); 2551 } 2552 2553 // ES2026 draft rev a562082b031d89d00ee667181ce8a6158656bd4b 2554 // 22.1.3.11 String.prototype.lastIndexOf ( searchString [ , position ] ) 2555 static bool str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp) { 2556 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "lastIndexOf"); 2557 CallArgs args = CallArgsFromVp(argc, vp); 2558 2559 // Steps 1-2. 2560 RootedString str(cx, 2561 ToStringForStringFunction(cx, "lastIndexOf", args.thisv())); 2562 if (!str) { 2563 return false; 2564 } 2565 2566 // Step 3. 2567 Rooted<JSLinearString*> searchStr(cx, ArgToLinearString(cx, args, 0)); 2568 if (!searchStr) { 2569 return false; 2570 } 2571 2572 // Step 7. 2573 size_t len = str->length(); 2574 2575 // Step 8. 2576 size_t searchLen = searchStr->length(); 2577 2578 // Steps 4-6 and 9. 2579 int start = len - searchLen; // Start searching here 2580 if (args.hasDefined(1)) { 2581 if (args[1].isInt32()) { 2582 int i = args[1].toInt32(); 2583 if (i <= 0) { 2584 start = 0; 2585 } else if (i < start) { 2586 start = i; 2587 } 2588 } else { 2589 double d; 2590 if (!ToNumber(cx, args[1], &d)) { 2591 return false; 2592 } 2593 if (!std::isnan(d)) { 2594 d = JS::ToInteger(d); 2595 if (d <= 0) { 2596 start = 0; 2597 } else if (d < start) { 2598 start = int(d); 2599 } 2600 } 2601 } 2602 } 2603 2604 if (str == searchStr) { 2605 args.rval().setInt32(0); 2606 return true; 2607 } 2608 2609 if (searchLen > len) { 2610 args.rval().setInt32(-1); 2611 return true; 2612 } 2613 2614 if (searchLen == 0) { 2615 args.rval().setInt32(start); 2616 return true; 2617 } 2618 MOZ_ASSERT(0 <= start && size_t(start) < len); 2619 2620 JSLinearString* text = str->ensureLinear(cx); 2621 if (!text) { 2622 return false; 2623 } 2624 2625 // Step 10-12. 2626 args.rval().setInt32(LastIndexOf(text, searchStr, start)); 2627 return true; 2628 } 2629 2630 bool js::StringLastIndexOf(JSContext* cx, HandleString string, 2631 HandleString searchString, int32_t* result) { 2632 if (string == searchString) { 2633 *result = 0; 2634 return true; 2635 } 2636 2637 size_t len = string->length(); 2638 size_t searchLen = searchString->length(); 2639 2640 if (searchLen > len) { 2641 *result = -1; 2642 return true; 2643 } 2644 2645 MOZ_ASSERT(len >= searchLen); 2646 size_t start = len - searchLen; 2647 2648 if (searchLen == 0) { 2649 *result = start; 2650 return true; 2651 } 2652 MOZ_ASSERT(start < len); 2653 2654 JSLinearString* text = string->ensureLinear(cx); 2655 if (!text) { 2656 return false; 2657 } 2658 2659 JSLinearString* searchStr = searchString->ensureLinear(cx); 2660 if (!searchStr) { 2661 return false; 2662 } 2663 2664 *result = LastIndexOf(text, searchStr, start); 2665 return true; 2666 } 2667 2668 // ES2026 draft rev a562082b031d89d00ee667181ce8a6158656bd4b 2669 // 22.1.3.24 String.prototype.startsWith ( searchString [ , position ] ) 2670 bool js::str_startsWith(JSContext* cx, unsigned argc, Value* vp) { 2671 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "startsWith"); 2672 CallArgs args = CallArgsFromVp(argc, vp); 2673 2674 // Steps 1-2. 2675 RootedString str(cx, 2676 ToStringForStringFunction(cx, "startsWith", args.thisv())); 2677 if (!str) { 2678 return false; 2679 } 2680 2681 // Steps 3-4. 2682 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) { 2683 return false; 2684 } 2685 2686 // Step 5. 2687 Rooted<JSLinearString*> searchStr(cx, ArgToLinearString(cx, args, 0)); 2688 if (!searchStr) { 2689 return false; 2690 } 2691 2692 // Step 6. 2693 uint32_t textLen = str->length(); 2694 2695 // Steps 7-8. 2696 uint32_t start = 0; 2697 if (args.hasDefined(1)) { 2698 if (!ToClampedStringIndex(cx, args[1], textLen, &start)) { 2699 return false; 2700 } 2701 } 2702 2703 // Step 9. 2704 uint32_t searchLen = searchStr->length(); 2705 2706 // Step 12. 2707 if (searchLen + start < searchLen || searchLen + start > textLen) { 2708 args.rval().setBoolean(false); 2709 return true; 2710 } 2711 2712 // Steps 10-11 and 13-15. 2713 JSLinearString* text = str->ensureLinear(cx); 2714 if (!text) { 2715 return false; 2716 } 2717 2718 args.rval().setBoolean(HasSubstringAt(text, searchStr, start)); 2719 return true; 2720 } 2721 2722 bool js::StringStartsWith(JSContext* cx, HandleString string, 2723 HandleString searchString, bool* result) { 2724 if (searchString->length() > string->length()) { 2725 *result = false; 2726 return true; 2727 } 2728 2729 JSLinearString* str = string->ensureLinear(cx); 2730 if (!str) { 2731 return false; 2732 } 2733 2734 JSLinearString* searchStr = searchString->ensureLinear(cx); 2735 if (!searchStr) { 2736 return false; 2737 } 2738 2739 *result = HasSubstringAt(str, searchStr, 0); 2740 return true; 2741 } 2742 2743 // ES2026 draft rev a562082b031d89d00ee667181ce8a6158656bd4b 2744 // 22.1.3.7 String.prototype.endsWith ( searchString [ , endPosition ] ) 2745 bool js::str_endsWith(JSContext* cx, unsigned argc, Value* vp) { 2746 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "endsWith"); 2747 CallArgs args = CallArgsFromVp(argc, vp); 2748 2749 // Steps 1-2. 2750 RootedString str(cx, ToStringForStringFunction(cx, "endsWith", args.thisv())); 2751 if (!str) { 2752 return false; 2753 } 2754 2755 // Steps 3-4. 2756 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) { 2757 return false; 2758 } 2759 2760 // Step 5. 2761 Rooted<JSLinearString*> searchStr(cx, ArgToLinearString(cx, args, 0)); 2762 if (!searchStr) { 2763 return false; 2764 } 2765 2766 // Step 6. 2767 uint32_t textLen = str->length(); 2768 2769 // Steps 7-8. 2770 uint32_t end = textLen; 2771 if (args.hasDefined(1)) { 2772 if (!ToClampedStringIndex(cx, args[1], textLen, &end)) { 2773 return false; 2774 } 2775 } 2776 2777 // Step 9. 2778 uint32_t searchLen = searchStr->length(); 2779 2780 // Step 12 (reordered). 2781 if (searchLen > end) { 2782 args.rval().setBoolean(false); 2783 return true; 2784 } 2785 2786 // Step 11. 2787 uint32_t start = end - searchLen; 2788 2789 // Steps 10 and 13-15. 2790 JSLinearString* text = str->ensureLinear(cx); 2791 if (!text) { 2792 return false; 2793 } 2794 2795 args.rval().setBoolean(HasSubstringAt(text, searchStr, start)); 2796 return true; 2797 } 2798 2799 bool js::StringEndsWith(JSContext* cx, HandleString string, 2800 HandleString searchString, bool* result) { 2801 if (searchString->length() > string->length()) { 2802 *result = false; 2803 return true; 2804 } 2805 2806 JSLinearString* str = string->ensureLinear(cx); 2807 if (!str) { 2808 return false; 2809 } 2810 2811 JSLinearString* searchStr = searchString->ensureLinear(cx); 2812 if (!searchStr) { 2813 return false; 2814 } 2815 2816 uint32_t start = str->length() - searchStr->length(); 2817 2818 *result = HasSubstringAt(str, searchStr, start); 2819 return true; 2820 } 2821 2822 template <typename CharT> 2823 static void TrimString(const CharT* chars, bool trimStart, bool trimEnd, 2824 size_t length, size_t* pBegin, size_t* pEnd) { 2825 size_t begin = 0, end = length; 2826 2827 if (trimStart) { 2828 while (begin < length && unicode::IsSpace(chars[begin])) { 2829 ++begin; 2830 } 2831 } 2832 2833 if (trimEnd) { 2834 while (end > begin && unicode::IsSpace(chars[end - 1])) { 2835 --end; 2836 } 2837 } 2838 2839 *pBegin = begin; 2840 *pEnd = end; 2841 } 2842 2843 static JSLinearString* TrimString(JSContext* cx, JSString* str, bool trimStart, 2844 bool trimEnd) { 2845 JSLinearString* linear = str->ensureLinear(cx); 2846 if (!linear) { 2847 return nullptr; 2848 } 2849 2850 size_t length = linear->length(); 2851 size_t begin, end; 2852 if (linear->hasLatin1Chars()) { 2853 AutoCheckCannotGC nogc; 2854 TrimString(linear->latin1Chars(nogc), trimStart, trimEnd, length, &begin, 2855 &end); 2856 } else { 2857 AutoCheckCannotGC nogc; 2858 TrimString(linear->twoByteChars(nogc), trimStart, trimEnd, length, &begin, 2859 &end); 2860 } 2861 2862 return NewDependentString(cx, linear, begin, end - begin); 2863 } 2864 2865 JSString* js::StringTrim(JSContext* cx, HandleString string) { 2866 return TrimString(cx, string, true, true); 2867 } 2868 2869 JSString* js::StringTrimStart(JSContext* cx, HandleString string) { 2870 return TrimString(cx, string, true, false); 2871 } 2872 2873 JSString* js::StringTrimEnd(JSContext* cx, HandleString string) { 2874 return TrimString(cx, string, false, true); 2875 } 2876 2877 static bool TrimString(JSContext* cx, const CallArgs& args, const char* funName, 2878 bool trimStart, bool trimEnd) { 2879 JSString* str = ToStringForStringFunction(cx, funName, args.thisv()); 2880 if (!str) { 2881 return false; 2882 } 2883 2884 JSLinearString* result = TrimString(cx, str, trimStart, trimEnd); 2885 if (!result) { 2886 return false; 2887 } 2888 2889 args.rval().setString(result); 2890 return true; 2891 } 2892 2893 static bool str_trim(JSContext* cx, unsigned argc, Value* vp) { 2894 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "trim"); 2895 CallArgs args = CallArgsFromVp(argc, vp); 2896 return TrimString(cx, args, "trim", true, true); 2897 } 2898 2899 static bool str_trimStart(JSContext* cx, unsigned argc, Value* vp) { 2900 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "trimStart"); 2901 CallArgs args = CallArgsFromVp(argc, vp); 2902 return TrimString(cx, args, "trimStart", true, false); 2903 } 2904 2905 static bool str_trimEnd(JSContext* cx, unsigned argc, Value* vp) { 2906 AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "trimEnd"); 2907 CallArgs args = CallArgsFromVp(argc, vp); 2908 return TrimString(cx, args, "trimEnd", false, true); 2909 } 2910 2911 // Utility for building a rope (lazy concatenation) of strings. 2912 class RopeBuilder { 2913 JSContext* cx; 2914 RootedString res; 2915 2916 RopeBuilder(const RopeBuilder& other) = delete; 2917 void operator=(const RopeBuilder& other) = delete; 2918 2919 public: 2920 explicit RopeBuilder(JSContext* cx) 2921 : cx(cx), res(cx, cx->runtime()->emptyString) {} 2922 2923 inline bool append(HandleString str) { 2924 res = ConcatStrings<CanGC>(cx, res, str); 2925 return !!res; 2926 } 2927 2928 inline JSString* result() { return res; } 2929 }; 2930 2931 namespace { 2932 2933 template <typename CharT> 2934 static uint32_t FindDollarIndex(const CharT* chars, size_t length) { 2935 if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) { 2936 uint32_t dollarIndex = p - chars; 2937 MOZ_ASSERT(dollarIndex < length); 2938 return dollarIndex; 2939 } 2940 return UINT32_MAX; 2941 } 2942 2943 } /* anonymous namespace */ 2944 2945 /* 2946 * Constructs a result string that looks like: 2947 * 2948 * newstring = string[:matchStart] + repstr + string[matchEnd:] 2949 */ 2950 static JSString* BuildFlatReplacement(JSContext* cx, HandleString textstr, 2951 Handle<JSLinearString*> repstr, 2952 size_t matchStart, size_t patternLength) { 2953 size_t matchEnd = matchStart + patternLength; 2954 2955 RootedString resultStr(cx, NewDependentString(cx, textstr, 0, matchStart)); 2956 if (!resultStr) { 2957 return nullptr; 2958 } 2959 2960 resultStr = ConcatStrings<CanGC>(cx, resultStr, repstr); 2961 if (!resultStr) { 2962 return nullptr; 2963 } 2964 2965 MOZ_ASSERT(textstr->length() >= matchEnd); 2966 RootedString rest(cx, NewDependentString(cx, textstr, matchEnd, 2967 textstr->length() - matchEnd)); 2968 if (!rest) { 2969 return nullptr; 2970 } 2971 2972 return ConcatStrings<CanGC>(cx, resultStr, rest); 2973 } 2974 2975 static JSString* BuildFlatRopeReplacement(JSContext* cx, HandleString textstr, 2976 Handle<JSLinearString*> repstr, 2977 size_t match, size_t patternLength) { 2978 MOZ_ASSERT(textstr->isRope()); 2979 2980 size_t matchEnd = match + patternLength; 2981 2982 /* 2983 * If we are replacing over a rope, avoid flattening it by iterating 2984 * through it, building a new rope. 2985 */ 2986 StringSegmentRange r(cx); 2987 if (!r.init(textstr)) { 2988 return nullptr; 2989 } 2990 2991 RopeBuilder builder(cx); 2992 2993 /* 2994 * Special case when the pattern string is '', which matches to the 2995 * head of the string and doesn't overlap with any component of the rope. 2996 */ 2997 if (patternLength == 0) { 2998 MOZ_ASSERT(match == 0); 2999 if (!builder.append(repstr)) { 3000 return nullptr; 3001 } 3002 } 3003 3004 size_t pos = 0; 3005 while (!r.empty()) { 3006 RootedString str(cx, r.front()); 3007 size_t len = str->length(); 3008 size_t strEnd = pos + len; 3009 if (pos < matchEnd && strEnd > match) { 3010 /* 3011 * We need to special-case any part of the rope that overlaps 3012 * with the replacement string. 3013 */ 3014 if (match >= pos) { 3015 /* 3016 * If this part of the rope overlaps with the left side of 3017 * the pattern, then it must be the only one to overlap with 3018 * the first character in the pattern, so we include the 3019 * replacement string here. 3020 */ 3021 RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos)); 3022 if (!leftSide || !builder.append(leftSide) || !builder.append(repstr)) { 3023 return nullptr; 3024 } 3025 } 3026 3027 /* 3028 * If str runs off the end of the matched string, append the 3029 * last part of str. 3030 */ 3031 if (strEnd > matchEnd) { 3032 RootedString rightSide( 3033 cx, NewDependentString(cx, str, matchEnd - pos, strEnd - matchEnd)); 3034 if (!rightSide || !builder.append(rightSide)) { 3035 return nullptr; 3036 } 3037 } 3038 } else { 3039 if (!builder.append(str)) { 3040 return nullptr; 3041 } 3042 } 3043 pos += str->length(); 3044 if (!r.popFront()) { 3045 return nullptr; 3046 } 3047 } 3048 3049 return builder.result(); 3050 } 3051 3052 template <typename CharT> 3053 static bool AppendDollarReplacement(StringBuilder& newReplaceChars, 3054 size_t firstDollarIndex, size_t matchStart, 3055 size_t matchLimit, 3056 const JSLinearString* text, 3057 const CharT* repChars, size_t repLength) { 3058 MOZ_ASSERT(firstDollarIndex < repLength); 3059 MOZ_ASSERT(matchStart <= matchLimit); 3060 MOZ_ASSERT(matchLimit <= text->length()); 3061 3062 // Move the pre-dollar chunk in bulk. 3063 if (!newReplaceChars.append(repChars, firstDollarIndex)) { 3064 return false; 3065 } 3066 3067 // Move the rest char-by-char, interpreting dollars as we encounter them. 3068 const CharT* repLimit = repChars + repLength; 3069 for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) { 3070 if (*it != '$' || it == repLimit - 1) { 3071 if (!newReplaceChars.append(*it)) { 3072 return false; 3073 } 3074 continue; 3075 } 3076 3077 switch (*(it + 1)) { 3078 case '$': 3079 // Eat one of the dollars. 3080 if (!newReplaceChars.append(*it)) { 3081 return false; 3082 } 3083 break; 3084 case '&': 3085 if (!newReplaceChars.appendSubstring(text, matchStart, 3086 matchLimit - matchStart)) { 3087 return false; 3088 } 3089 break; 3090 case '`': 3091 if (!newReplaceChars.appendSubstring(text, 0, matchStart)) { 3092 return false; 3093 } 3094 break; 3095 case '\'': 3096 if (!newReplaceChars.appendSubstring(text, matchLimit, 3097 text->length() - matchLimit)) { 3098 return false; 3099 } 3100 break; 3101 default: 3102 // The dollar we saw was not special (no matter what its mother told 3103 // it). 3104 if (!newReplaceChars.append(*it)) { 3105 return false; 3106 } 3107 continue; 3108 } 3109 ++it; // We always eat an extra char in the above switch. 3110 } 3111 3112 return true; 3113 } 3114 3115 /* 3116 * Perform a linear-scan dollar substitution on the replacement text. 3117 */ 3118 static JSLinearString* InterpretDollarReplacement( 3119 JSContext* cx, HandleString textstrArg, Handle<JSLinearString*> repstr, 3120 uint32_t firstDollarIndex, size_t matchStart, size_t patternLength) { 3121 Rooted<JSLinearString*> textstr(cx, textstrArg->ensureLinear(cx)); 3122 if (!textstr) { 3123 return nullptr; 3124 } 3125 3126 size_t matchLimit = matchStart + patternLength; 3127 3128 /* 3129 * Most probably: 3130 * 3131 * len(newstr) >= len(orig) - len(match) + len(replacement) 3132 * 3133 * Note that dollar vars _could_ make the resulting text smaller than this. 3134 */ 3135 JSStringBuilder newReplaceChars(cx); 3136 if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars()) { 3137 return nullptr; 3138 } 3139 3140 if (!newReplaceChars.reserve(textstr->length() - patternLength + 3141 repstr->length())) { 3142 return nullptr; 3143 } 3144 3145 bool res; 3146 if (repstr->hasLatin1Chars()) { 3147 AutoCheckCannotGC nogc; 3148 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart, 3149 matchLimit, textstr, 3150 repstr->latin1Chars(nogc), repstr->length()); 3151 } else { 3152 AutoCheckCannotGC nogc; 3153 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart, 3154 matchLimit, textstr, 3155 repstr->twoByteChars(nogc), repstr->length()); 3156 } 3157 if (!res) { 3158 return nullptr; 3159 } 3160 3161 return newReplaceChars.finishString(); 3162 } 3163 3164 template <typename StrChar, typename RepChar> 3165 static bool StrFlatReplaceGlobal(JSContext* cx, const JSLinearString* str, 3166 const JSLinearString* pat, 3167 const JSLinearString* rep, StringBuilder& sb) { 3168 MOZ_ASSERT(str->length() > 0); 3169 3170 AutoCheckCannotGC nogc; 3171 const StrChar* strChars = str->chars<StrChar>(nogc); 3172 const RepChar* repChars = rep->chars<RepChar>(nogc); 3173 3174 // The pattern is empty, so we interleave the replacement string in-between 3175 // each character. 3176 if (!pat->length()) { 3177 CheckedInt<uint32_t> strLength(str->length()); 3178 CheckedInt<uint32_t> repLength(rep->length()); 3179 CheckedInt<uint32_t> length = repLength * (strLength - 1) + strLength; 3180 if (!length.isValid()) { 3181 ReportAllocationOverflow(cx); 3182 return false; 3183 } 3184 3185 if (!sb.reserve(length.value())) { 3186 return false; 3187 } 3188 3189 for (unsigned i = 0; i < str->length() - 1; ++i, ++strChars) { 3190 sb.infallibleAppend(*strChars); 3191 sb.infallibleAppend(repChars, rep->length()); 3192 } 3193 sb.infallibleAppend(*strChars); 3194 return true; 3195 } 3196 3197 // If it's true, we are sure that the result's length is, at least, the same 3198 // length as |str->length()|. 3199 if (rep->length() >= pat->length()) { 3200 if (!sb.reserve(str->length())) { 3201 return false; 3202 } 3203 } 3204 3205 uint32_t start = 0; 3206 for (;;) { 3207 int match = StringMatch(str, pat, start); 3208 if (match < 0) { 3209 break; 3210 } 3211 if (!sb.append(strChars + start, match - start)) { 3212 return false; 3213 } 3214 if (!sb.append(repChars, rep->length())) { 3215 return false; 3216 } 3217 start = match + pat->length(); 3218 } 3219 3220 if (!sb.append(strChars + start, str->length() - start)) { 3221 return false; 3222 } 3223 3224 return true; 3225 } 3226 3227 // This is identical to "str.split(pattern).join(replacement)" except that we 3228 // do some deforestation optimization in Ion. 3229 JSString* js::StringFlatReplaceString(JSContext* cx, HandleString string, 3230 HandleString pattern, 3231 HandleString replacement) { 3232 MOZ_ASSERT(string); 3233 MOZ_ASSERT(pattern); 3234 MOZ_ASSERT(replacement); 3235 3236 if (!string->length()) { 3237 return string; 3238 } 3239 3240 Rooted<JSLinearString*> linearRepl(cx, replacement->ensureLinear(cx)); 3241 if (!linearRepl) { 3242 return nullptr; 3243 } 3244 3245 Rooted<JSLinearString*> linearPat(cx, pattern->ensureLinear(cx)); 3246 if (!linearPat) { 3247 return nullptr; 3248 } 3249 3250 Rooted<JSLinearString*> linearStr(cx, string->ensureLinear(cx)); 3251 if (!linearStr) { 3252 return nullptr; 3253 } 3254 3255 JSStringBuilder sb(cx); 3256 if (linearStr->hasTwoByteChars()) { 3257 if (!sb.ensureTwoByteChars()) { 3258 return nullptr; 3259 } 3260 if (linearRepl->hasTwoByteChars()) { 3261 if (!StrFlatReplaceGlobal<char16_t, char16_t>(cx, linearStr, linearPat, 3262 linearRepl, sb)) { 3263 return nullptr; 3264 } 3265 } else { 3266 if (!StrFlatReplaceGlobal<char16_t, Latin1Char>(cx, linearStr, linearPat, 3267 linearRepl, sb)) { 3268 return nullptr; 3269 } 3270 } 3271 } else { 3272 if (linearRepl->hasTwoByteChars()) { 3273 if (!sb.ensureTwoByteChars()) { 3274 return nullptr; 3275 } 3276 if (!StrFlatReplaceGlobal<Latin1Char, char16_t>(cx, linearStr, linearPat, 3277 linearRepl, sb)) { 3278 return nullptr; 3279 } 3280 } else { 3281 if (!StrFlatReplaceGlobal<Latin1Char, Latin1Char>( 3282 cx, linearStr, linearPat, linearRepl, sb)) { 3283 return nullptr; 3284 } 3285 } 3286 } 3287 3288 return sb.finishString(); 3289 } 3290 3291 JSString* js::str_replace_string_raw(JSContext* cx, HandleString string, 3292 HandleString pattern, 3293 HandleString replacement) { 3294 Rooted<JSLinearString*> pat(cx, pattern->ensureLinear(cx)); 3295 if (!pat) { 3296 return nullptr; 3297 } 3298 3299 /* 3300 * |string| could be a rope, so we want to avoid flattening it for as 3301 * long as possible. 3302 */ 3303 int32_t match; 3304 if (string->isRope()) { 3305 if (!RopeMatch(cx, &string->asRope(), pat, &match)) { 3306 return nullptr; 3307 } 3308 } else { 3309 match = StringMatch(&string->asLinear(), pat, 0); 3310 } 3311 3312 if (match < 0) { 3313 return string; 3314 } 3315 3316 Rooted<JSLinearString*> repl(cx, replacement->ensureLinear(cx)); 3317 if (!repl) { 3318 return nullptr; 3319 } 3320 uint32_t dollarIndex; 3321 { 3322 AutoCheckCannotGC nogc; 3323 dollarIndex = 3324 repl->hasLatin1Chars() 3325 ? FindDollarIndex(repl->latin1Chars(nogc), repl->length()) 3326 : FindDollarIndex(repl->twoByteChars(nogc), repl->length()); 3327 } 3328 3329 size_t patternLength = pat->length(); 3330 3331 if (dollarIndex != UINT32_MAX) { 3332 repl = InterpretDollarReplacement(cx, string, repl, dollarIndex, match, 3333 patternLength); 3334 if (!repl) { 3335 return nullptr; 3336 } 3337 } else if (string->isRope()) { 3338 return BuildFlatRopeReplacement(cx, string, repl, match, patternLength); 3339 } 3340 return BuildFlatReplacement(cx, string, repl, match, patternLength); 3341 } 3342 3343 template <typename StrChar, typename RepChar> 3344 static bool ReplaceAllInternal(const AutoCheckCannotGC& nogc, 3345 const JSLinearString* string, 3346 const JSLinearString* searchString, 3347 const JSLinearString* replaceString, 3348 const int32_t startPosition, 3349 JSStringBuilder& result) { 3350 // Step 7. 3351 const size_t stringLength = string->length(); 3352 const size_t searchLength = searchString->length(); 3353 const size_t replaceLength = replaceString->length(); 3354 3355 MOZ_ASSERT(stringLength > 0); 3356 MOZ_ASSERT(searchLength > 0); 3357 MOZ_ASSERT(stringLength >= searchLength); 3358 3359 // Step 12. 3360 uint32_t endOfLastMatch = 0; 3361 3362 const StrChar* strChars = string->chars<StrChar>(nogc); 3363 const RepChar* repChars = replaceString->chars<RepChar>(nogc); 3364 3365 uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength); 3366 3367 // If it's true, we are sure that the result's length is, at least, the same 3368 // length as |str->length()|. 3369 if (replaceLength >= searchLength) { 3370 if (!result.reserve(stringLength)) { 3371 return false; 3372 } 3373 } 3374 3375 int32_t position = startPosition; 3376 do { 3377 // Step 14.c. 3378 // Append the substring before the current match. 3379 if (!result.append(strChars + endOfLastMatch, position - endOfLastMatch)) { 3380 return false; 3381 } 3382 3383 // Steps 14.a-b and 14.d. 3384 // Append the replacement. 3385 if (dollarIndex != UINT32_MAX) { 3386 size_t matchLimit = position + searchLength; 3387 if (!AppendDollarReplacement(result, dollarIndex, position, matchLimit, 3388 string, repChars, replaceLength)) { 3389 return false; 3390 } 3391 } else { 3392 if (!result.append(repChars, replaceLength)) { 3393 return false; 3394 } 3395 } 3396 3397 // Step 14.e. 3398 endOfLastMatch = position + searchLength; 3399 3400 // Step 11. 3401 // Find the next match. 3402 position = StringMatch(string, searchString, endOfLastMatch); 3403 } while (position >= 0); 3404 3405 // Step 15. 3406 // Append the substring after the last match. 3407 return result.append(strChars + endOfLastMatch, 3408 stringLength - endOfLastMatch); 3409 } 3410 3411 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall 3412 // Steps 7-16 when functionalReplace is false and searchString is not empty. 3413 // 3414 // The steps are quite different, for performance. Loops in steps 11 and 14 3415 // are fused. GetSubstitution is optimized away when possible. 3416 template <typename StrChar, typename RepChar> 3417 static JSString* ReplaceAll(JSContext* cx, JSLinearString* string, 3418 const JSLinearString* searchString, 3419 const JSLinearString* replaceString) { 3420 // Step 7 moved into ReplaceAll_internal. 3421 3422 // Step 8 (advanceBy is equal to searchLength when searchLength > 0). 3423 3424 // Step 9 (not needed in this implementation). 3425 3426 // Step 10. 3427 // Find the first match. 3428 int32_t position = StringMatch(string, searchString, 0); 3429 3430 // Nothing to replace, so return early. 3431 if (position < 0) { 3432 return string; 3433 } 3434 3435 // Steps 11, 12 moved into ReplaceAll_internal. 3436 3437 // Step 13. 3438 JSStringBuilder result(cx); 3439 if constexpr (std::is_same_v<StrChar, char16_t> || 3440 std::is_same_v<RepChar, char16_t>) { 3441 if (!result.ensureTwoByteChars()) { 3442 return nullptr; 3443 } 3444 } 3445 3446 bool internalFailure = false; 3447 { 3448 AutoCheckCannotGC nogc; 3449 internalFailure = !ReplaceAllInternal<StrChar, RepChar>( 3450 nogc, string, searchString, replaceString, position, result); 3451 } 3452 if (internalFailure) { 3453 return nullptr; 3454 } 3455 3456 // Step 16. 3457 return result.finishString(); 3458 } 3459 3460 template <typename StrChar, typename RepChar> 3461 static bool ReplaceAllInterleaveInternal(const AutoCheckCannotGC& nogc, 3462 JSContext* cx, 3463 const JSLinearString* string, 3464 const JSLinearString* replaceString, 3465 JSStringBuilder& result) { 3466 // Step 7. 3467 const size_t stringLength = string->length(); 3468 const size_t replaceLength = replaceString->length(); 3469 3470 const StrChar* strChars = string->chars<StrChar>(nogc); 3471 const RepChar* repChars = replaceString->chars<RepChar>(nogc); 3472 3473 uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength); 3474 3475 if (dollarIndex != UINT32_MAX) { 3476 if (!result.reserve(stringLength)) { 3477 return false; 3478 } 3479 } else { 3480 // Compute the exact result length when no substitutions take place. 3481 CheckedInt<uint32_t> strLength(stringLength); 3482 CheckedInt<uint32_t> repLength(replaceLength); 3483 CheckedInt<uint32_t> length = strLength + (strLength + 1) * repLength; 3484 if (!length.isValid()) { 3485 ReportAllocationOverflow(cx); 3486 return false; 3487 } 3488 3489 if (!result.reserve(length.value())) { 3490 return false; 3491 } 3492 } 3493 3494 auto appendReplacement = [&](size_t match) { 3495 if (dollarIndex != UINT32_MAX) { 3496 return AppendDollarReplacement(result, dollarIndex, match, match, string, 3497 repChars, replaceLength); 3498 } 3499 return result.append(repChars, replaceLength); 3500 }; 3501 3502 for (size_t index = 0; index < stringLength; index++) { 3503 // Steps 11, 14.a-b and 14.d. 3504 // The empty string matches before each character. 3505 if (!appendReplacement(index)) { 3506 return false; 3507 } 3508 3509 // Step 14.c. 3510 if (!result.append(strChars[index])) { 3511 return false; 3512 } 3513 } 3514 3515 // Steps 11, 14.a-b and 14.d. 3516 // The empty string also matches at the end of the string. 3517 return appendReplacement(stringLength); 3518 3519 // Step 15 (not applicable when searchString is the empty string). 3520 } 3521 3522 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall 3523 // Steps 7-16 when functionalReplace is false and searchString is the empty 3524 // string. 3525 // 3526 // The steps are quite different, for performance. Loops in steps 11 and 14 3527 // are fused. GetSubstitution is optimized away when possible. 3528 template <typename StrChar, typename RepChar> 3529 static JSString* ReplaceAllInterleave(JSContext* cx, 3530 const JSLinearString* string, 3531 const JSLinearString* replaceString) { 3532 // Step 7 moved into ReplaceAllInterleavedInternal. 3533 3534 // Step 8 (advanceBy is 1 when searchString is the empty string). 3535 3536 // Steps 9-12 (trivial when searchString is the empty string). 3537 3538 // Step 13. 3539 JSStringBuilder result(cx); 3540 if constexpr (std::is_same_v<StrChar, char16_t> || 3541 std::is_same_v<RepChar, char16_t>) { 3542 if (!result.ensureTwoByteChars()) { 3543 return nullptr; 3544 } 3545 } 3546 3547 bool internalFailure = false; 3548 { 3549 AutoCheckCannotGC nogc; 3550 internalFailure = !ReplaceAllInterleaveInternal<StrChar, RepChar>( 3551 nogc, cx, string, replaceString, result); 3552 } 3553 if (internalFailure) { 3554 return nullptr; 3555 } 3556 3557 // Step 16. 3558 return result.finishString(); 3559 } 3560 3561 // String.prototype.replaceAll (Stage 3 proposal) 3562 // https://tc39.es/proposal-string-replaceall/ 3563 // 3564 // String.prototype.replaceAll ( searchValue, replaceValue ) 3565 // 3566 // Steps 7-16 when functionalReplace is false. 3567 JSString* js::str_replaceAll_string_raw(JSContext* cx, HandleString string, 3568 HandleString searchString, 3569 HandleString replaceString) { 3570 const size_t stringLength = string->length(); 3571 const size_t searchLength = searchString->length(); 3572 3573 // Directly return when we're guaranteed to find no match. 3574 if (searchLength > stringLength) { 3575 return string; 3576 } 3577 3578 Rooted<JSLinearString*> str(cx, string->ensureLinear(cx)); 3579 if (!str) { 3580 return nullptr; 3581 } 3582 3583 Rooted<JSLinearString*> repl(cx, replaceString->ensureLinear(cx)); 3584 if (!repl) { 3585 return nullptr; 3586 } 3587 3588 Rooted<JSLinearString*> search(cx, searchString->ensureLinear(cx)); 3589 if (!search) { 3590 return nullptr; 3591 } 3592 3593 // The pattern is empty, so we interleave the replacement string in-between 3594 // each character. 3595 if (searchLength == 0) { 3596 if (str->hasTwoByteChars()) { 3597 if (repl->hasTwoByteChars()) { 3598 return ReplaceAllInterleave<char16_t, char16_t>(cx, str, repl); 3599 } 3600 return ReplaceAllInterleave<char16_t, Latin1Char>(cx, str, repl); 3601 } 3602 if (repl->hasTwoByteChars()) { 3603 return ReplaceAllInterleave<Latin1Char, char16_t>(cx, str, repl); 3604 } 3605 return ReplaceAllInterleave<Latin1Char, Latin1Char>(cx, str, repl); 3606 } 3607 3608 MOZ_ASSERT(stringLength > 0); 3609 3610 if (str->hasTwoByteChars()) { 3611 if (repl->hasTwoByteChars()) { 3612 return ReplaceAll<char16_t, char16_t>(cx, str, search, repl); 3613 } 3614 return ReplaceAll<char16_t, Latin1Char>(cx, str, search, repl); 3615 } 3616 if (repl->hasTwoByteChars()) { 3617 return ReplaceAll<Latin1Char, char16_t>(cx, str, search, repl); 3618 } 3619 return ReplaceAll<Latin1Char, Latin1Char>(cx, str, search, repl); 3620 } 3621 3622 static ArrayObject* SingleElementStringArray(JSContext* cx, 3623 Handle<JSLinearString*> str) { 3624 ArrayObject* array = NewDenseFullyAllocatedArray(cx, 1); 3625 if (!array) { 3626 return nullptr; 3627 } 3628 array->setDenseInitializedLength(1); 3629 array->initDenseElement(0, StringValue(str)); 3630 return array; 3631 } 3632 3633 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18. 3634 static ArrayObject* SplitHelper(JSContext* cx, Handle<JSLinearString*> str, 3635 uint32_t limit, Handle<JSLinearString*> sep) { 3636 size_t strLength = str->length(); 3637 size_t sepLength = sep->length(); 3638 MOZ_ASSERT(sepLength != 0); 3639 3640 // Step 12. 3641 if (strLength == 0) { 3642 // Step 12.a. 3643 int match = StringMatch(str, sep, 0); 3644 3645 // Step 12.b. 3646 if (match != -1) { 3647 return NewDenseEmptyArray(cx); 3648 } 3649 3650 // Steps 12.c-e. 3651 return SingleElementStringArray(cx, str); 3652 } 3653 3654 // Step 3 (reordered). 3655 Rooted<ArrayObject*> substrings(cx, NewDenseEmptyArray(cx)); 3656 if (!substrings) { 3657 return nullptr; 3658 } 3659 3660 // Switch to allocating in the tenured heap if we fill the nursery. 3661 AutoSelectGCHeap gcHeap(cx); 3662 3663 // Step 8 (reordered). 3664 size_t lastEndIndex = 0; 3665 3666 // Step 13. 3667 size_t index = 0; 3668 3669 // Step 14. 3670 while (index != strLength) { 3671 // Step 14.a. 3672 int match = StringMatch(str, sep, index); 3673 3674 // Step 14.b. 3675 // 3676 // Our match algorithm differs from the spec in that it returns the 3677 // next index at which a match happens. If no match happens we're 3678 // done. 3679 // 3680 // But what if the match is at the end of the string (and the string is 3681 // not empty)? Per 14.c.i this shouldn't be a match, so we have to 3682 // specially exclude it. Thus this case should hold: 3683 // 3684 // var a = "abc".split(/\b/); 3685 // assertEq(a.length, 1); 3686 // assertEq(a[0], "abc"); 3687 if (match == -1) { 3688 break; 3689 } 3690 3691 // Step 14.c. 3692 size_t endIndex = match + sepLength; 3693 3694 // Step 14.c.i. 3695 if (endIndex == lastEndIndex) { 3696 index++; 3697 continue; 3698 } 3699 3700 // Step 14.c.ii. 3701 MOZ_ASSERT(lastEndIndex < endIndex); 3702 MOZ_ASSERT(sepLength <= strLength); 3703 MOZ_ASSERT(lastEndIndex + sepLength <= endIndex); 3704 3705 // Step 14.c.ii.1. 3706 size_t subLength = size_t(endIndex - sepLength - lastEndIndex); 3707 JSString* sub = 3708 NewDependentString(cx, str, lastEndIndex, subLength, gcHeap); 3709 3710 // Steps 14.c.ii.2-4. 3711 if (!sub || !NewbornArrayPush(cx, substrings, StringValue(sub))) { 3712 return nullptr; 3713 } 3714 3715 // Step 14.c.ii.5. 3716 if (substrings->length() == limit) { 3717 return substrings; 3718 } 3719 3720 // Step 14.c.ii.6. 3721 index = endIndex; 3722 3723 // Step 14.c.ii.7. 3724 lastEndIndex = index; 3725 } 3726 3727 // Step 15. 3728 size_t subLength = strLength - lastEndIndex; 3729 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength, gcHeap); 3730 3731 // Steps 16-17. 3732 if (!sub || !NewbornArrayPush(cx, substrings, StringValue(sub))) { 3733 return nullptr; 3734 } 3735 3736 // Step 18. 3737 return substrings; 3738 } 3739 3740 // Fast-path for splitting a string into a character array via split(""). 3741 static ArrayObject* CharSplitHelper(JSContext* cx, Handle<JSLinearString*> str, 3742 uint32_t limit) { 3743 size_t strLength = str->length(); 3744 if (strLength == 0) { 3745 return NewDenseEmptyArray(cx); 3746 } 3747 3748 js::StaticStrings& staticStrings = cx->staticStrings(); 3749 uint32_t resultlen = (limit < strLength ? limit : strLength); 3750 MOZ_ASSERT(limit > 0 && resultlen > 0, 3751 "Neither limit nor strLength is zero, so resultlen is greater " 3752 "than zero."); 3753 3754 Rooted<ArrayObject*> splits(cx, NewDenseFullyAllocatedArray(cx, resultlen)); 3755 if (!splits) { 3756 return nullptr; 3757 } 3758 3759 if (str->hasLatin1Chars()) { 3760 splits->setDenseInitializedLength(resultlen); 3761 3762 JS::AutoCheckCannotGC nogc; 3763 const Latin1Char* latin1Chars = str->latin1Chars(nogc); 3764 for (size_t i = 0; i < resultlen; ++i) { 3765 Latin1Char c = latin1Chars[i]; 3766 MOZ_ASSERT(staticStrings.hasUnit(c)); 3767 splits->initDenseElement(i, StringValue(staticStrings.getUnit(c))); 3768 } 3769 } else { 3770 splits->ensureDenseInitializedLength(0, resultlen); 3771 3772 for (size_t i = 0; i < resultlen; ++i) { 3773 JSString* sub = staticStrings.getUnitStringForElement(cx, str, i); 3774 if (!sub) { 3775 return nullptr; 3776 } 3777 splits->initDenseElement(i, StringValue(sub)); 3778 } 3779 } 3780 3781 return splits; 3782 } 3783 3784 template <typename TextChar> 3785 static MOZ_ALWAYS_INLINE ArrayObject* SplitSingleCharHelper( 3786 JSContext* cx, Handle<JSLinearString*> str, const TextChar* text, 3787 uint32_t textLen, char16_t patCh) { 3788 // Count the number of occurrences of patCh within text. 3789 uint32_t count = 0; 3790 for (size_t index = 0; index < textLen; index++) { 3791 if (static_cast<char16_t>(text[index]) == patCh) { 3792 count++; 3793 } 3794 } 3795 3796 // Handle zero-occurrence case - return input string in an array. 3797 if (count == 0) { 3798 return SingleElementStringArray(cx, str); 3799 } 3800 3801 // Create the result array for the substring values. 3802 Rooted<ArrayObject*> splits(cx, NewDenseFullyAllocatedArray(cx, count + 1)); 3803 if (!splits) { 3804 return nullptr; 3805 } 3806 splits->ensureDenseInitializedLength(0, count + 1); 3807 3808 // Add substrings. 3809 uint32_t splitsIndex = 0; 3810 size_t lastEndIndex = 0; 3811 for (size_t index = 0; index < textLen; index++) { 3812 if (static_cast<char16_t>(text[index]) == patCh) { 3813 size_t subLength = size_t(index - lastEndIndex); 3814 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength); 3815 if (!sub) { 3816 return nullptr; 3817 } 3818 splits->initDenseElement(splitsIndex++, StringValue(sub)); 3819 lastEndIndex = index + 1; 3820 } 3821 } 3822 3823 // Add substring for tail of string (after last match). 3824 JSString* sub = 3825 NewDependentString(cx, str, lastEndIndex, textLen - lastEndIndex); 3826 if (!sub) { 3827 return nullptr; 3828 } 3829 splits->initDenseElement(splitsIndex++, StringValue(sub)); 3830 3831 return splits; 3832 } 3833 3834 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18. 3835 static ArrayObject* SplitSingleCharHelper(JSContext* cx, 3836 Handle<JSLinearString*> str, 3837 char16_t ch) { 3838 // Step 12. 3839 size_t strLength = str->length(); 3840 3841 AutoStableStringChars linearChars(cx); 3842 if (!linearChars.init(cx, str)) { 3843 return nullptr; 3844 } 3845 3846 if (linearChars.isLatin1()) { 3847 return SplitSingleCharHelper(cx, str, linearChars.latin1Chars(), strLength, 3848 ch); 3849 } 3850 3851 return SplitSingleCharHelper(cx, str, linearChars.twoByteChars(), strLength, 3852 ch); 3853 } 3854 3855 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18. 3856 ArrayObject* js::StringSplitString(JSContext* cx, HandleString str, 3857 HandleString sep, uint32_t limit) { 3858 MOZ_ASSERT(limit > 0, "Only called for strictly positive limit."); 3859 3860 Rooted<JSLinearString*> linearStr(cx, str->ensureLinear(cx)); 3861 if (!linearStr) { 3862 return nullptr; 3863 } 3864 3865 Rooted<JSLinearString*> linearSep(cx, sep->ensureLinear(cx)); 3866 if (!linearSep) { 3867 return nullptr; 3868 } 3869 3870 if (linearSep->length() == 0) { 3871 return CharSplitHelper(cx, linearStr, limit); 3872 } 3873 3874 if (linearSep->length() == 1 && limit >= static_cast<uint32_t>(INT32_MAX)) { 3875 char16_t ch = linearSep->latin1OrTwoByteChar(0); 3876 return SplitSingleCharHelper(cx, linearStr, ch); 3877 } 3878 3879 return SplitHelper(cx, linearStr, limit, linearSep); 3880 } 3881 3882 static const JSFunctionSpec string_methods[] = { 3883 JS_FN("toSource", str_toSource, 0, 0), 3884 3885 /* Java-like methods. */ 3886 JS_INLINABLE_FN("toString", str_toString, 0, 0, StringToString), 3887 JS_INLINABLE_FN("valueOf", str_toString, 0, 0, StringValueOf), 3888 JS_INLINABLE_FN("toLowerCase", str_toLowerCase, 0, 0, StringToLowerCase), 3889 JS_INLINABLE_FN("toUpperCase", str_toUpperCase, 0, 0, StringToUpperCase), 3890 JS_INLINABLE_FN("charAt", str_charAt, 1, 0, StringCharAt), 3891 JS_INLINABLE_FN("charCodeAt", str_charCodeAt, 1, 0, StringCharCodeAt), 3892 JS_INLINABLE_FN("codePointAt", str_codePointAt, 1, 0, StringCodePointAt), 3893 JS_INLINABLE_FN("at", str_at, 1, 0, StringAt), 3894 JS_SELF_HOSTED_FN("substring", "String_substring", 2, 0), 3895 JS_SELF_HOSTED_FN("padStart", "String_pad_start", 2, 0), 3896 JS_SELF_HOSTED_FN("padEnd", "String_pad_end", 2, 0), 3897 JS_INLINABLE_FN("includes", str_includes, 1, 0, StringIncludes), 3898 JS_INLINABLE_FN("indexOf", str_indexOf, 1, 0, StringIndexOf), 3899 JS_INLINABLE_FN("lastIndexOf", str_lastIndexOf, 1, 0, StringLastIndexOf), 3900 JS_INLINABLE_FN("startsWith", str_startsWith, 1, 0, StringStartsWith), 3901 JS_INLINABLE_FN("endsWith", str_endsWith, 1, 0, StringEndsWith), 3902 JS_INLINABLE_FN("trim", str_trim, 0, 0, StringTrim), 3903 JS_INLINABLE_FN("trimStart", str_trimStart, 0, 0, StringTrimStart), 3904 JS_INLINABLE_FN("trimEnd", str_trimEnd, 0, 0, StringTrimEnd), 3905 JS_INLINABLE_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0, 0, 3906 StringToLocaleLowerCase), 3907 JS_INLINABLE_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0, 0, 3908 StringToLocaleUpperCase), 3909 JS_FN("localeCompare", str_localeCompare, 1, 0), 3910 JS_SELF_HOSTED_FN("repeat", "String_repeat", 1, 0), 3911 #if JS_HAS_INTL_API 3912 JS_FN("normalize", str_normalize, 0, 0), 3913 #endif 3914 3915 /* Perl-ish methods (search is actually Python-esque). */ 3916 JS_SELF_HOSTED_FN("match", "String_match", 1, 0), 3917 JS_SELF_HOSTED_FN("matchAll", "String_matchAll", 1, 0), 3918 JS_SELF_HOSTED_FN("search", "String_search", 1, 0), 3919 JS_SELF_HOSTED_FN("replace", "String_replace", 2, 0), 3920 JS_SELF_HOSTED_FN("replaceAll", "String_replaceAll", 2, 0), 3921 JS_SELF_HOSTED_FN("split", "String_split", 2, 0), 3922 JS_SELF_HOSTED_FN("substr", "String_substr", 2, 0), 3923 3924 /* Python-esque sequence methods. */ 3925 JS_SELF_HOSTED_FN("concat", "String_concat", 1, 0), 3926 JS_SELF_HOSTED_FN("slice", "String_slice", 2, 0), 3927 3928 /* HTML string methods. */ 3929 JS_SELF_HOSTED_FN("bold", "String_bold", 0, 0), 3930 JS_SELF_HOSTED_FN("italics", "String_italics", 0, 0), 3931 JS_SELF_HOSTED_FN("fixed", "String_fixed", 0, 0), 3932 JS_SELF_HOSTED_FN("strike", "String_strike", 0, 0), 3933 JS_SELF_HOSTED_FN("small", "String_small", 0, 0), 3934 JS_SELF_HOSTED_FN("big", "String_big", 0, 0), 3935 JS_SELF_HOSTED_FN("blink", "String_blink", 0, 0), 3936 JS_SELF_HOSTED_FN("sup", "String_sup", 0, 0), 3937 JS_SELF_HOSTED_FN("sub", "String_sub", 0, 0), 3938 JS_SELF_HOSTED_FN("anchor", "String_anchor", 1, 0), 3939 JS_SELF_HOSTED_FN("link", "String_link", 1, 0), 3940 JS_SELF_HOSTED_FN("fontcolor", "String_fontcolor", 1, 0), 3941 JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1, 0), 3942 3943 JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0, 0), 3944 3945 /* well-formed unicode strings */ 3946 JS_FN("isWellFormed", str_isWellFormed, 0, 0), 3947 JS_FN("toWellFormed", str_toWellFormed, 0, 0), 3948 3949 JS_FS_END, 3950 }; 3951 3952 // ES6 rev 27 (2014 Aug 24) 21.1.1 3953 bool js::StringConstructor(JSContext* cx, unsigned argc, Value* vp) { 3954 CallArgs args = CallArgsFromVp(argc, vp); 3955 3956 RootedString str(cx); 3957 if (args.length() > 0) { 3958 if (!args.isConstructing() && args[0].isSymbol()) { 3959 return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval()); 3960 } 3961 3962 str = ToString<CanGC>(cx, args[0]); 3963 if (!str) { 3964 return false; 3965 } 3966 } else { 3967 str = cx->runtime()->emptyString; 3968 } 3969 3970 if (args.isConstructing()) { 3971 RootedObject proto(cx); 3972 if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_String, &proto)) { 3973 return false; 3974 } 3975 3976 StringObject* strobj = StringObject::create(cx, str, proto); 3977 if (!strobj) { 3978 return false; 3979 } 3980 args.rval().setObject(*strobj); 3981 return true; 3982 } 3983 3984 args.rval().setString(str); 3985 return true; 3986 } 3987 3988 static inline JSLinearString* CodeUnitToString(JSContext* cx, char16_t code) { 3989 if (StaticStrings::hasUnit(code)) { 3990 return cx->staticStrings().getUnit(code); 3991 } 3992 return NewInlineString<CanGC>(cx, {code}, 1); 3993 } 3994 3995 JSLinearString* js::StringFromCharCode(JSContext* cx, int32_t charCode) { 3996 return CodeUnitToString(cx, char16_t(charCode)); 3997 } 3998 3999 JSLinearString* js::StringFromCodePoint(JSContext* cx, char32_t codePoint) { 4000 MOZ_ASSERT(codePoint <= unicode::NonBMPMax); 4001 4002 if (!unicode::IsSupplementary(codePoint)) { 4003 return CodeUnitToString(cx, char16_t(codePoint)); 4004 } 4005 4006 char16_t chars[] = {unicode::LeadSurrogate(codePoint), 4007 unicode::TrailSurrogate(codePoint)}; 4008 return NewInlineString<CanGC>(cx, chars, 2); 4009 } 4010 4011 // Inspect arguments to guess the output string type. 4012 static bool GuessFromCharCodeIsLatin1(const CallArgs& args) { 4013 // Arbitrarily chosen number of samples to inspect. 4014 constexpr unsigned SampleSize = 8; 4015 4016 for (unsigned i = 0; i < std::min(args.length(), SampleSize); i++) { 4017 auto v = args[i]; 4018 if (v.isInt32() && uint16_t(v.toInt32()) > JSString::MAX_LATIN1_CHAR) { 4019 return false; 4020 } 4021 } 4022 return true; 4023 } 4024 4025 /** 4026 * 22.1.2.1 String.fromCharCode ( ...codeUnits ) 4027 * 4028 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35 4029 */ 4030 bool js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) { 4031 CallArgs args = CallArgsFromVp(argc, vp); 4032 4033 MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX); 4034 4035 // Optimize the single-char case. 4036 if (args.length() == 1) { 4037 uint16_t code; 4038 if (!ToUint16(cx, args[0], &code)) { 4039 return false; 4040 } 4041 4042 JSString* str = CodeUnitToString(cx, char16_t(code)); 4043 if (!str) { 4044 return false; 4045 } 4046 4047 args.rval().setString(str); 4048 return true; 4049 } 4050 4051 // Optimize the case where the result will be a Latin-1 string. 4052 StringChars<Latin1Char> latin1Chars(cx); 4053 4054 unsigned i = 0; 4055 uint16_t firstTwoByteChar = 0; 4056 if (GuessFromCharCodeIsLatin1(args)) { 4057 if (!latin1Chars.maybeAlloc(cx, args.length())) { 4058 return false; 4059 } 4060 4061 for (; i < args.length(); i++) { 4062 uint16_t code; 4063 if (!ToUint16(cx, args[i], &code)) { 4064 return false; 4065 } 4066 4067 if (code > JSString::MAX_LATIN1_CHAR) { 4068 firstTwoByteChar = code; 4069 break; 4070 } 4071 4072 AutoCheckCannotGC nogc; 4073 latin1Chars.data(nogc)[i] = code; 4074 } 4075 4076 if (i == args.length()) { 4077 JSString* str = latin1Chars.toStringDontDeflate<CanGC>(cx, args.length()); 4078 if (!str) { 4079 return false; 4080 } 4081 4082 args.rval().setString(str); 4083 return true; 4084 } 4085 } 4086 4087 StringChars<char16_t> twoByteChars(cx); 4088 if (!twoByteChars.maybeAlloc(cx, args.length())) { 4089 return false; 4090 } 4091 4092 // Copy the already processed characters. 4093 if (i > 0) { 4094 AutoCheckCannotGC nogc; 4095 std::copy_n(latin1Chars.data(nogc), i, twoByteChars.data(nogc)); 4096 } 4097 4098 // Copy the first two-byte character, if present. 4099 if (firstTwoByteChar > 0) { 4100 MOZ_ASSERT(firstTwoByteChar > JSString::MAX_LATIN1_CHAR); 4101 4102 AutoCheckCannotGC nogc; 4103 twoByteChars.data(nogc)[i++] = char16_t(firstTwoByteChar); 4104 } 4105 4106 for (; i < args.length(); i++) { 4107 uint16_t code; 4108 if (!ToUint16(cx, args[i], &code)) { 4109 return false; 4110 } 4111 4112 AutoCheckCannotGC nogc; 4113 twoByteChars.data(nogc)[i] = code; 4114 } 4115 4116 JSString* str = twoByteChars.toStringDontDeflate<CanGC>(cx, args.length()); 4117 if (!str) { 4118 return false; 4119 } 4120 4121 args.rval().setString(str); 4122 return true; 4123 } 4124 4125 /** 4126 * 22.1.2.2 String.fromCodePoint ( ...codePoints ) 4127 * 4128 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35 4129 */ 4130 static MOZ_ALWAYS_INLINE bool ToCodePoint(JSContext* cx, HandleValue code, 4131 char32_t* codePoint) { 4132 // String.fromCodePoint, Steps 2.a-d. 4133 4134 // Fast path for the common case - the input is already an int32. 4135 if (code.isInt32()) { 4136 // Step 2.a. 4137 int32_t nextCP = code.toInt32(); 4138 4139 // Steps 2.b-d. 4140 if (MOZ_LIKELY(uint32_t(nextCP) <= unicode::NonBMPMax)) { 4141 *codePoint = char32_t(nextCP); 4142 return true; 4143 } 4144 } 4145 4146 // Step 2.a. 4147 double nextCP; 4148 if (!ToNumber(cx, code, &nextCP)) { 4149 return false; 4150 } 4151 4152 // Steps 2.b-c. 4153 if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 || 4154 nextCP > unicode::NonBMPMax) { 4155 ToCStringBuf cbuf; 4156 const char* numStr = NumberToCString(&cbuf, nextCP); 4157 MOZ_ASSERT(numStr); 4158 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 4159 JSMSG_NOT_A_CODEPOINT, numStr); 4160 return false; 4161 } 4162 4163 // Steps 2.d. 4164 *codePoint = char32_t(nextCP); 4165 return true; 4166 } 4167 4168 /** 4169 * 22.1.2.2 String.fromCodePoint ( ...codePoints ) 4170 * 4171 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35 4172 */ 4173 static bool str_fromCodePoint_few_args(JSContext* cx, const CallArgs& args) { 4174 MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2); 4175 4176 // Step 1. 4177 char16_t elements[JSFatInlineString::MAX_LENGTH_TWO_BYTE]; 4178 4179 // Step 2. 4180 unsigned length = 0; 4181 for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) { 4182 // Steps 2.a-c. 4183 char32_t codePoint; 4184 if (!ToCodePoint(cx, args[nextIndex], &codePoint)) { 4185 return false; 4186 } 4187 4188 // Step 2.d. 4189 unicode::UTF16Encode(codePoint, elements, &length); 4190 } 4191 4192 // Steps 3-4. 4193 JSString* str = NewStringCopyN<CanGC>(cx, elements, length); 4194 if (!str) { 4195 return false; 4196 } 4197 4198 args.rval().setString(str); 4199 return true; 4200 } 4201 4202 /** 4203 * 22.1.2.2 String.fromCodePoint ( ...codePoints ) 4204 * 4205 * ES2024 draft rev 7d2644968bd56d54d2886c012d18698ff3f72c35 4206 */ 4207 bool js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) { 4208 CallArgs args = CallArgsFromVp(argc, vp); 4209 4210 // Optimize the single code-point case. 4211 if (args.length() == 1) { 4212 // Step 1. (Omitted) 4213 4214 // Step 2. 4215 char32_t codePoint; 4216 if (!ToCodePoint(cx, args[0], &codePoint)) { 4217 return false; 4218 } 4219 4220 // Steps 3-4. 4221 JSString* str = StringFromCodePoint(cx, codePoint); 4222 if (!str) { 4223 return false; 4224 } 4225 4226 args.rval().setString(str); 4227 return true; 4228 } 4229 4230 // Optimize the case where the result will definitely fit in an inline 4231 // string (thin or fat) and so we don't need to malloc the chars. (We could 4232 // cover some cases where |args.length()| goes up to 4233 // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars 4234 // are all Latin-1, but it doesn't seem worth the effort.) 4235 if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2) { 4236 return str_fromCodePoint_few_args(cx, args); 4237 } 4238 4239 // Step 1. 4240 static_assert( 4241 ARGS_LENGTH_MAX < std::numeric_limits<decltype(args.length())>::max() / 2, 4242 "|args.length() * 2| does not overflow"); 4243 auto elements = cx->make_pod_arena_array<char16_t>(js::StringBufferArena, 4244 args.length() * 2); 4245 if (!elements) { 4246 return false; 4247 } 4248 4249 // Steps 2. 4250 unsigned length = 0; 4251 for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) { 4252 // Steps 2.a-c. 4253 char32_t codePoint; 4254 if (!ToCodePoint(cx, args[nextIndex], &codePoint)) { 4255 return false; 4256 } 4257 4258 // Step 2.d. 4259 unicode::UTF16Encode(codePoint, elements.get(), &length); 4260 } 4261 4262 // Steps 3-4. 4263 JSString* str = NewString<CanGC>(cx, std::move(elements), length); 4264 if (!str) { 4265 return false; 4266 } 4267 4268 args.rval().setString(str); 4269 return true; 4270 } 4271 4272 static const JSFunctionSpec string_static_methods[] = { 4273 JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0, 4274 StringFromCharCode), 4275 JS_INLINABLE_FN("fromCodePoint", js::str_fromCodePoint, 1, 0, 4276 StringFromCodePoint), 4277 4278 JS_SELF_HOSTED_FN("raw", "String_static_raw", 1, 0), 4279 JS_FS_END, 4280 }; 4281 4282 /* static */ 4283 SharedShape* StringObject::assignInitialShape(JSContext* cx, 4284 Handle<StringObject*> obj) { 4285 MOZ_ASSERT(obj->empty()); 4286 4287 if (!NativeObject::addPropertyInReservedSlot(cx, obj, cx->names().length, 4288 LENGTH_SLOT, {})) { 4289 return nullptr; 4290 } 4291 4292 return obj->sharedShape(); 4293 } 4294 4295 JSObject* StringObject::createPrototype(JSContext* cx, JSProtoKey key) { 4296 Rooted<JSString*> empty(cx, cx->runtime()->emptyString); 4297 4298 // Because the `length` property of a StringObject is both non-configurable 4299 // and non-writable, we need to take the slow path of proxy result 4300 // validation for them, and so we need to ensure that the initial ObjectFlags 4301 // reflect that. Normally this would be handled for us, but the special 4302 // SharedShape::ensureInitialCustomShape path which ultimately takes us 4303 // through StringObject::assignInitialShape which adds the problematic 4304 // property sneaks past our flag setting logic and results in a failed 4305 // lookup of the initial shape in SharedShape::insertInitialShape. 4306 Rooted<StringObject*> proto( 4307 cx, GlobalObject::createBlankPrototype<StringObject>( 4308 cx, cx->global(), 4309 ObjectFlags({ObjectFlag::NeedsProxyGetSetResultValidation}))); 4310 if (!proto) { 4311 return nullptr; 4312 } 4313 if (!StringObject::init(cx, proto, empty)) { 4314 return nullptr; 4315 } 4316 return proto; 4317 } 4318 4319 static bool StringClassFinish(JSContext* cx, HandleObject ctor, 4320 HandleObject proto) { 4321 Handle<NativeObject*> nativeProto = proto.as<NativeObject>(); 4322 4323 // Create "trimLeft" as an alias for "trimStart". 4324 RootedValue trimFn(cx); 4325 RootedId trimId(cx, NameToId(cx->names().trimStart)); 4326 RootedId trimAliasId(cx, NameToId(cx->names().trimLeft)); 4327 if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) || 4328 !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) { 4329 return false; 4330 } 4331 4332 // Create "trimRight" as an alias for "trimEnd". 4333 trimId = NameToId(cx->names().trimEnd); 4334 trimAliasId = NameToId(cx->names().trimRight); 4335 if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) || 4336 !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) { 4337 return false; 4338 } 4339 4340 /* 4341 * Define escape/unescape, the URI encode/decode functions, and maybe 4342 * uneval on the global object. 4343 */ 4344 if (!JS_DefineFunctions(cx, cx->global(), string_functions)) { 4345 return false; 4346 } 4347 4348 return true; 4349 } 4350 4351 const ClassSpec StringObject::classSpec_ = { 4352 GenericCreateConstructor<StringConstructor, 1, gc::AllocKind::FUNCTION, 4353 &jit::JitInfo_String>, 4354 StringObject::createPrototype, 4355 string_static_methods, 4356 nullptr, 4357 string_methods, 4358 nullptr, 4359 StringClassFinish, 4360 }; 4361 4362 #define ____ false 4363 4364 /* 4365 * Uri reserved chars + #: 4366 * - 35: # 4367 * - 36: $ 4368 * - 38: & 4369 * - 43: + 4370 * - 44: , 4371 * - 47: / 4372 * - 58: : 4373 * - 59: ; 4374 * - 61: = 4375 * - 63: ? 4376 * - 64: @ 4377 */ 4378 static const bool js_isUriReservedPlusPound[] = { 4379 // clang-format off 4380 /* 0 1 2 3 4 5 6 7 8 9 */ 4381 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4382 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4383 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4384 /* 3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____, 4385 /* 4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____, 4386 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true, 4387 /* 6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____, 4388 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4389 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4390 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4391 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4392 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4393 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____ 4394 // clang-format on 4395 }; 4396 4397 /* 4398 * Uri unescaped chars: 4399 * - 33: ! 4400 * - 39: ' 4401 * - 40: ( 4402 * - 41: ) 4403 * - 42: * 4404 * - 45: - 4405 * - 46: . 4406 * - 48..57: 0-9 4407 * - 65..90: A-Z 4408 * - 95: _ 4409 * - 97..122: a-z 4410 * - 126: ~ 4411 */ 4412 static const bool js_isUriUnescaped[] = { 4413 // clang-format off 4414 /* 0 1 2 3 4 5 6 7 8 9 */ 4415 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4416 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4417 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____, 4418 /* 3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true, 4419 /* 4 */ true, true, true, ____, ____, true, true, ____, true, true, 4420 /* 5 */ true, true, true, true, true, true, true, true, ____, ____, 4421 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true, 4422 /* 7 */ true, true, true, true, true, true, true, true, true, true, 4423 /* 8 */ true, true, true, true, true, true, true, true, true, true, 4424 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true, 4425 /* 10 */ true, true, true, true, true, true, true, true, true, true, 4426 /* 11 */ true, true, true, true, true, true, true, true, true, true, 4427 /* 12 */ true, true, true, ____, ____, ____, true, ____ 4428 // clang-format on 4429 }; 4430 4431 #undef ____ 4432 4433 static inline bool TransferBufferToString(JSStringBuilder& sb, JSString* str, 4434 MutableHandleValue rval) { 4435 if (!sb.empty()) { 4436 str = sb.finishString(); 4437 if (!str) { 4438 return false; 4439 } 4440 } 4441 rval.setString(str); 4442 return true; 4443 } 4444 4445 /* 4446 * ECMA 3, 15.1.3 URI Handling Function Properties 4447 * 4448 * The following are implementations of the algorithms 4449 * given in the ECMA specification for the hidden functions 4450 * 'Encode' and 'Decode'. 4451 */ 4452 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success }; 4453 4454 // Bug 1403318: GCC sometimes inlines this Encode function rather than the 4455 // caller Encode function. Annotate both functions with MOZ_NEVER_INLINE resp. 4456 // MOZ_ALWAYS_INLINE to ensure we get the desired inlining behavior. 4457 template <typename CharT> 4458 static MOZ_NEVER_INLINE EncodeResult Encode(StringBuilder& sb, 4459 const CharT* chars, size_t length, 4460 const bool* unescapedSet) { 4461 Latin1Char hexBuf[3]; 4462 hexBuf[0] = '%'; 4463 4464 auto appendEncoded = [&sb, &hexBuf](Latin1Char c) { 4465 static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */ 4466 4467 hexBuf[1] = HexDigits[c >> 4]; 4468 hexBuf[2] = HexDigits[c & 0xf]; 4469 return sb.append(hexBuf, 3); 4470 }; 4471 4472 auto appendRange = [&sb, chars, length](size_t start, size_t end) { 4473 MOZ_ASSERT(start <= end); 4474 4475 if (start < end) { 4476 if (start == 0) { 4477 if (!sb.reserve(length)) { 4478 return false; 4479 } 4480 } 4481 return sb.append(chars + start, chars + end); 4482 } 4483 return true; 4484 }; 4485 4486 size_t startAppend = 0; 4487 for (size_t k = 0; k < length; k++) { 4488 CharT c = chars[k]; 4489 if (c < 128 && 4490 (js_isUriUnescaped[c] || (unescapedSet && unescapedSet[c]))) { 4491 continue; 4492 } else { 4493 if (!appendRange(startAppend, k)) { 4494 return Encode_Failure; 4495 } 4496 4497 if constexpr (std::is_same_v<CharT, Latin1Char>) { 4498 if (c < 0x80) { 4499 if (!appendEncoded(c)) { 4500 return Encode_Failure; 4501 } 4502 } else { 4503 if (!appendEncoded(0xC0 | (c >> 6)) || 4504 !appendEncoded(0x80 | (c & 0x3F))) { 4505 return Encode_Failure; 4506 } 4507 } 4508 } else { 4509 if (unicode::IsTrailSurrogate(c)) { 4510 return Encode_BadUri; 4511 } 4512 4513 char32_t v; 4514 if (!unicode::IsLeadSurrogate(c)) { 4515 v = c; 4516 } else { 4517 k++; 4518 if (k == length) { 4519 return Encode_BadUri; 4520 } 4521 4522 char16_t c2 = chars[k]; 4523 if (!unicode::IsTrailSurrogate(c2)) { 4524 return Encode_BadUri; 4525 } 4526 4527 v = unicode::UTF16Decode(c, c2); 4528 } 4529 4530 uint8_t utf8buf[4]; 4531 size_t L = OneUcs4ToUtf8Char(utf8buf, v); 4532 for (size_t j = 0; j < L; j++) { 4533 if (!appendEncoded(utf8buf[j])) { 4534 return Encode_Failure; 4535 } 4536 } 4537 } 4538 4539 startAppend = k + 1; 4540 } 4541 } 4542 4543 if (startAppend > 0) { 4544 if (!appendRange(startAppend, length)) { 4545 return Encode_Failure; 4546 } 4547 } 4548 4549 return Encode_Success; 4550 } 4551 4552 static MOZ_ALWAYS_INLINE bool Encode(JSContext* cx, Handle<JSLinearString*> str, 4553 const bool* unescapedSet, 4554 MutableHandleValue rval) { 4555 size_t length = str->length(); 4556 if (length == 0) { 4557 rval.setString(cx->runtime()->emptyString); 4558 return true; 4559 } 4560 4561 JSStringBuilder sb(cx); 4562 4563 EncodeResult res; 4564 if (str->hasLatin1Chars()) { 4565 AutoCheckCannotGC nogc; 4566 res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet); 4567 } else { 4568 AutoCheckCannotGC nogc; 4569 res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet); 4570 } 4571 4572 if (res == Encode_Failure) { 4573 return false; 4574 } 4575 4576 if (res == Encode_BadUri) { 4577 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI); 4578 return false; 4579 } 4580 4581 MOZ_ASSERT(res == Encode_Success); 4582 return TransferBufferToString(sb, str, rval); 4583 } 4584 4585 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success }; 4586 4587 template <typename CharT> 4588 static DecodeResult Decode(StringBuilder& sb, const CharT* chars, size_t length, 4589 const bool* reservedSet) { 4590 auto appendRange = [&sb, chars](size_t start, size_t end) { 4591 MOZ_ASSERT(start <= end); 4592 4593 if (start < end) { 4594 return sb.append(chars + start, chars + end); 4595 } 4596 return true; 4597 }; 4598 4599 size_t startAppend = 0; 4600 for (size_t k = 0; k < length; k++) { 4601 CharT c = chars[k]; 4602 if (c == '%') { 4603 size_t start = k; 4604 if ((k + 2) >= length) { 4605 return Decode_BadUri; 4606 } 4607 4608 if (!IsAsciiHexDigit(chars[k + 1]) || !IsAsciiHexDigit(chars[k + 2])) { 4609 return Decode_BadUri; 4610 } 4611 4612 uint32_t B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 + 4613 AsciiAlphanumericToNumber(chars[k + 2]); 4614 k += 2; 4615 if (B < 128) { 4616 Latin1Char ch = Latin1Char(B); 4617 if (reservedSet && reservedSet[ch]) { 4618 continue; 4619 } 4620 4621 if (!appendRange(startAppend, start)) { 4622 return Decode_Failure; 4623 } 4624 if (!sb.append(ch)) { 4625 return Decode_Failure; 4626 } 4627 } else { 4628 int n = 1; 4629 while (B & (0x80 >> n)) { 4630 n++; 4631 } 4632 4633 if (n == 1 || n > 4) { 4634 return Decode_BadUri; 4635 } 4636 4637 uint8_t octets[4]; 4638 octets[0] = (uint8_t)B; 4639 if (k + 3 * (n - 1) >= length) { 4640 return Decode_BadUri; 4641 } 4642 4643 for (int j = 1; j < n; j++) { 4644 k++; 4645 if (chars[k] != '%') { 4646 return Decode_BadUri; 4647 } 4648 4649 if (!IsAsciiHexDigit(chars[k + 1]) || 4650 !IsAsciiHexDigit(chars[k + 2])) { 4651 return Decode_BadUri; 4652 } 4653 4654 B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 + 4655 AsciiAlphanumericToNumber(chars[k + 2]); 4656 if ((B & 0xC0) != 0x80) { 4657 return Decode_BadUri; 4658 } 4659 4660 k += 2; 4661 octets[j] = char(B); 4662 } 4663 4664 if (!appendRange(startAppend, start)) { 4665 return Decode_Failure; 4666 } 4667 4668 char32_t v = JS::Utf8ToOneUcs4Char(octets, n); 4669 MOZ_ASSERT(v >= 128); 4670 if (v >= unicode::NonBMPMin) { 4671 if (v > unicode::NonBMPMax) { 4672 return Decode_BadUri; 4673 } 4674 4675 if (!sb.append(unicode::LeadSurrogate(v))) { 4676 return Decode_Failure; 4677 } 4678 if (!sb.append(unicode::TrailSurrogate(v))) { 4679 return Decode_Failure; 4680 } 4681 } else { 4682 if (!sb.append(char16_t(v))) { 4683 return Decode_Failure; 4684 } 4685 } 4686 } 4687 4688 startAppend = k + 1; 4689 } 4690 } 4691 4692 if (startAppend > 0) { 4693 if (!appendRange(startAppend, length)) { 4694 return Decode_Failure; 4695 } 4696 } 4697 4698 return Decode_Success; 4699 } 4700 4701 static bool Decode(JSContext* cx, Handle<JSLinearString*> str, 4702 const bool* reservedSet, MutableHandleValue rval) { 4703 size_t length = str->length(); 4704 if (length == 0) { 4705 rval.setString(cx->runtime()->emptyString); 4706 return true; 4707 } 4708 4709 JSStringBuilder sb(cx); 4710 4711 DecodeResult res; 4712 if (str->hasLatin1Chars()) { 4713 AutoCheckCannotGC nogc; 4714 res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet); 4715 } else { 4716 AutoCheckCannotGC nogc; 4717 res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet); 4718 } 4719 4720 if (res == Decode_Failure) { 4721 return false; 4722 } 4723 4724 if (res == Decode_BadUri) { 4725 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI); 4726 return false; 4727 } 4728 4729 MOZ_ASSERT(res == Decode_Success); 4730 return TransferBufferToString(sb, str, rval); 4731 } 4732 4733 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp) { 4734 AutoJSMethodProfilerEntry pseudoFrame(cx, "decodeURI"); 4735 CallArgs args = CallArgsFromVp(argc, vp); 4736 Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0)); 4737 if (!str) { 4738 return false; 4739 } 4740 4741 return Decode(cx, str, js_isUriReservedPlusPound, args.rval()); 4742 } 4743 4744 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp) { 4745 AutoJSMethodProfilerEntry pseudoFrame(cx, "decodeURIComponent"); 4746 CallArgs args = CallArgsFromVp(argc, vp); 4747 Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0)); 4748 if (!str) { 4749 return false; 4750 } 4751 4752 return Decode(cx, str, nullptr, args.rval()); 4753 } 4754 4755 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp) { 4756 AutoJSMethodProfilerEntry pseudoFrame(cx, "encodeURI"); 4757 CallArgs args = CallArgsFromVp(argc, vp); 4758 Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0)); 4759 if (!str) { 4760 return false; 4761 } 4762 4763 return Encode(cx, str, js_isUriReservedPlusPound, args.rval()); 4764 } 4765 4766 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp) { 4767 AutoJSMethodProfilerEntry pseudoFrame(cx, "encodeURIComponent"); 4768 CallArgs args = CallArgsFromVp(argc, vp); 4769 Rooted<JSLinearString*> str(cx, ArgToLinearString(cx, args, 0)); 4770 if (!str) { 4771 return false; 4772 } 4773 4774 return Encode(cx, str, nullptr, args.rval()); 4775 } 4776 4777 JSString* js::EncodeURI(JSContext* cx, const char* chars, size_t length) { 4778 JSStringBuilder sb(cx); 4779 EncodeResult result = Encode(sb, reinterpret_cast<const Latin1Char*>(chars), 4780 length, js_isUriReservedPlusPound); 4781 if (result == EncodeResult::Encode_Failure) { 4782 return nullptr; 4783 } 4784 if (result == EncodeResult::Encode_BadUri) { 4785 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI); 4786 return nullptr; 4787 } 4788 if (sb.empty()) { 4789 return NewStringCopyN<CanGC>(cx, chars, length); 4790 } 4791 return sb.finishString(); 4792 } 4793 4794 static bool FlatStringMatchHelper(JSContext* cx, JSString* str, 4795 JSString* pattern, bool* isFlat, 4796 int32_t* match) { 4797 JSLinearString* linearPattern = pattern->ensureLinear(cx); 4798 if (!linearPattern) { 4799 return false; 4800 } 4801 4802 static const size_t MAX_FLAT_PAT_LEN = 256; 4803 if (linearPattern->length() > MAX_FLAT_PAT_LEN || 4804 StringHasRegExpMetaChars(linearPattern)) { 4805 *isFlat = false; 4806 return true; 4807 } 4808 4809 *isFlat = true; 4810 if (str->isRope()) { 4811 if (!RopeMatch(cx, &str->asRope(), linearPattern, match)) { 4812 return false; 4813 } 4814 } else { 4815 *match = StringMatch(&str->asLinear(), linearPattern); 4816 } 4817 4818 return true; 4819 } 4820 4821 static bool BuildFlatMatchArray(JSContext* cx, HandleString str, 4822 HandleString pattern, int32_t match, 4823 MutableHandleValue rval) { 4824 if (match < 0) { 4825 rval.setNull(); 4826 return true; 4827 } 4828 4829 // Get the shape for the match result object. 4830 Rooted<SharedShape*> shape( 4831 cx, cx->global()->regExpRealm().getOrCreateMatchResultShape(cx)); 4832 if (!shape) { 4833 return false; 4834 } 4835 4836 Rooted<ArrayObject*> arr(cx, 4837 NewDenseFullyAllocatedArrayWithShape(cx, 1, shape)); 4838 if (!arr) { 4839 return false; 4840 } 4841 4842 // Store a Value for each pair. 4843 arr->setDenseInitializedLength(1); 4844 arr->initDenseElement(0, StringValue(pattern)); 4845 4846 // Set the |index| property. 4847 arr->initSlot(RegExpRealm::MatchResultObjectIndexSlot, Int32Value(match)); 4848 4849 // Set the |input| property. 4850 arr->initSlot(RegExpRealm::MatchResultObjectInputSlot, StringValue(str)); 4851 4852 #ifdef DEBUG 4853 RootedValue test(cx); 4854 RootedId id(cx, NameToId(cx->names().index)); 4855 if (!NativeGetProperty(cx, arr, id, &test)) { 4856 return false; 4857 } 4858 MOZ_ASSERT(test == arr->getSlot(0)); 4859 id = NameToId(cx->names().input); 4860 if (!NativeGetProperty(cx, arr, id, &test)) { 4861 return false; 4862 } 4863 MOZ_ASSERT(test == arr->getSlot(1)); 4864 #endif 4865 4866 rval.setObject(*arr); 4867 return true; 4868 } 4869 4870 bool js::FlatStringMatch(JSContext* cx, unsigned argc, Value* vp) { 4871 CallArgs args = CallArgsFromVp(argc, vp); 4872 MOZ_ASSERT(args.length() == 2); 4873 MOZ_ASSERT(args[0].isString()); 4874 MOZ_ASSERT(args[1].isString()); 4875 MOZ_ASSERT(cx->realm()->realmFuses.optimizeRegExpPrototypeFuse.intact()); 4876 4877 RootedString str(cx, args[0].toString()); 4878 RootedString pattern(cx, args[1].toString()); 4879 4880 bool isFlat = false; 4881 int32_t match = 0; 4882 if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) { 4883 return false; 4884 } 4885 4886 if (!isFlat) { 4887 args.rval().setUndefined(); 4888 return true; 4889 } 4890 4891 return BuildFlatMatchArray(cx, str, pattern, match, args.rval()); 4892 } 4893 4894 bool js::FlatStringSearch(JSContext* cx, unsigned argc, Value* vp) { 4895 CallArgs args = CallArgsFromVp(argc, vp); 4896 MOZ_ASSERT(args.length() == 2); 4897 MOZ_ASSERT(args[0].isString()); 4898 MOZ_ASSERT(args[1].isString()); 4899 MOZ_ASSERT(cx->realm()->realmFuses.optimizeRegExpPrototypeFuse.intact()); 4900 4901 JSString* str = args[0].toString(); 4902 JSString* pattern = args[1].toString(); 4903 4904 bool isFlat = false; 4905 int32_t match = 0; 4906 if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) { 4907 return false; 4908 } 4909 4910 if (!isFlat) { 4911 args.rval().setInt32(-2); 4912 return true; 4913 } 4914 4915 args.rval().setInt32(match); 4916 return true; 4917 }