StringType.h (93463B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef vm_StringType_h 8 #define vm_StringType_h 9 10 #include "mozilla/Maybe.h" 11 #include "mozilla/MemoryReporting.h" 12 #include "mozilla/Range.h" 13 #include "mozilla/RefPtr.h" 14 #include "mozilla/Span.h" 15 #include "mozilla/StringBuffer.h" 16 #include "mozilla/TextUtils.h" 17 18 #include <string_view> // std::basic_string_view 19 20 #include "jstypes.h" // js::Bit 21 22 #include "gc/Cell.h" 23 #include "gc/MaybeRooted.h" 24 #include "gc/Nursery.h" 25 #include "gc/RelocationOverlay.h" 26 #include "gc/StoreBuffer.h" 27 #include "js/CharacterEncoding.h" 28 #include "js/RootingAPI.h" 29 #include "js/shadow/String.h" // JS::shadow::String 30 #include "js/String.h" // JS::MaxStringLength 31 #include "js/UniquePtr.h" 32 #include "util/Text.h" 33 34 class JSDependentString; 35 class JSExtensibleString; 36 class JSExternalString; 37 class JSInlineString; 38 class JSRope; 39 40 namespace JS { 41 class JS_PUBLIC_API AutoStableStringChars; 42 } // namespace JS 43 44 namespace js { 45 46 class ArrayObject; 47 class JS_PUBLIC_API GenericPrinter; 48 class JSONPrinter; 49 class PropertyName; 50 class StringBuilder; 51 class JSOffThreadAtom; 52 53 namespace frontend { 54 class ParserAtomsTable; 55 class TaggedParserAtomIndex; 56 class WellKnownParserAtoms; 57 struct CompilationAtomCache; 58 } // namespace frontend 59 60 namespace jit { 61 class MacroAssembler; 62 } // namespace jit 63 64 /* The buffer length required to contain any unsigned 32-bit integer. */ 65 static const size_t UINT32_CHAR_BUFFER_LENGTH = sizeof("4294967295") - 1; 66 67 // Maximum array index. This value is defined in the spec (ES2021 draft, 6.1.7): 68 // 69 // An array index is an integer index whose numeric value i is in the range 70 // +0𝔽 ≤ i < 𝔽(2^32 - 1). 71 const uint32_t MAX_ARRAY_INDEX = 4294967294u; // 2^32-2 (= UINT32_MAX-1) 72 73 // Returns true if the characters of `s` store an unsigned 32-bit integer value 74 // less than or equal to MAX_ARRAY_INDEX, initializing `*indexp` to that value 75 // if so. Leading '0' isn't allowed except 0 itself. 76 template <typename CharT> 77 bool CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp); 78 79 } /* namespace js */ 80 81 // clang-format off 82 /* 83 * [SMDOC] JavaScript Strings 84 * 85 * Conceptually, a JS string is just an array of chars and a length. This array 86 * of chars may or may not be null-terminated and, if it is, the null character 87 * is not included in the length. 88 * 89 * To improve performance of common operations, the following optimizations are 90 * made which affect the engine's representation of strings: 91 * 92 * - The plain vanilla representation is a "linear" string which consists of a 93 * string header in the GC heap and a malloc'd char array. 94 * 95 * - To avoid copying a substring of an existing "base" string , a "dependent" 96 * string (JSDependentString) can be created which points into the base 97 * string's char array. 98 * 99 * - To avoid O(n^2) char buffer copying, a "rope" node (JSRope) can be created 100 * to represent a delayed string concatenation. Concatenation (called 101 * flattening) is performed if and when a linear char array is requested. In 102 * general, ropes form a binary dag whose internal nodes are JSRope string 103 * headers with no associated char array and whose leaf nodes are linear 104 * strings. 105 * 106 * - To avoid copying the leftmost string when flattening, we may produce an 107 * "extensible" string, which tracks not only its actual length but also its 108 * buffer's overall size. If such an "extensible" string appears as the 109 * leftmost string in a subsequent flatten, and its buffer has enough unused 110 * space, we can simply flatten the rest of the ropes into its buffer, 111 * leaving its text in place. We then transfer ownership of its buffer to the 112 * flattened rope, and mutate the donor extensible string into a dependent 113 * string referencing its original buffer. 114 * 115 * (The term "extensible" does not imply that we ever 'realloc' the buffer. 116 * Extensible strings may have dependent strings pointing into them, and the 117 * JSAPI hands out pointers to linear strings' buffers, so resizing with 118 * 'realloc' is generally not possible.) 119 * 120 * - To avoid allocating small char arrays, short strings can be stored inline 121 * in the string header (JSInlineString). These come in two flavours: 122 * JSThinInlineString, which is the same size as JSString; and 123 * JSFatInlineString, which has a larger header and so can fit more chars. 124 * 125 * - To avoid comparing O(n) string equality comparison, strings can be 126 * canonicalized to "atoms" (JSAtom) such that there is a single atom with a 127 * given (length,chars). 128 * 129 * - To avoid copying all strings created through the JSAPI, an "external" 130 * string (JSExternalString) can be created whose chars are managed by the 131 * JSAPI client. 132 * 133 * - To avoid using two bytes per character for every string, string 134 * characters are stored as Latin1 instead of TwoByte if all characters are 135 * representable in Latin1. 136 * 137 * - To avoid slow conversions from strings to integer indexes, we cache 16 bit 138 * unsigned indexes on strings representing such numbers. 139 * 140 * Although all strings share the same basic memory layout, we can conceptually 141 * arrange them into a hierarchy of operations/invariants and represent this 142 * hierarchy in C++ with classes: 143 * 144 * C++ type operations+fields / invariants+properties 145 * ========================== ========================================= 146 * JSString (abstract) get(Latin1|TwoByte)CharsZ, get(Latin1|TwoByte)Chars, length / - 147 * | \ 148 * | JSRope leftChild, rightChild / - 149 * | 150 * JSLinearString latin1Chars, twoByteChars / - 151 * | 152 * +-- JSDependentString base / - 153 * | | 154 * | +-- JSAtomRefString - / base points to an atom 155 * | 156 * +-- JSExternalString - / char array memory managed by embedding 157 * | 158 * +-- JSExtensibleString - / tracks total buffer capacity (including current text) 159 * | 160 * +-- JSInlineString (abstract) - / chars stored in header 161 * | | 162 * | +-- JSThinInlineString - / header is normal 163 * | | 164 * | +-- JSFatInlineString - / header is fat 165 * | 166 * JSAtom (abstract) - / string equality === pointer equality 167 * | | 168 * | +-- js::NormalAtom JSLinearString + atom hash code / - 169 * | | | 170 * | | +-- js::ThinInlineAtom 171 * | | possibly larger JSThinInlineString + atom hash code / - 172 * | | 173 * | +-- js::FatInlineAtom JSFatInlineString w/atom hash code / - 174 * | 175 * js::PropertyName - / chars don't contain an index (uint32_t) 176 * 177 * Classes marked with (abstract) above are not literally C++ Abstract Base 178 * Classes (since there are no virtual functions, pure or not, in this 179 * hierarchy), but have the same meaning: there are no strings with this type as 180 * its most-derived type. 181 * 182 * Atoms can additionally be permanent, i.e. unable to be collected, and can 183 * be combined with other string types to create additional most-derived types 184 * that satisfy the invariants of more than one of the abovementioned 185 * most-derived types. Furthermore, each atom stores a hash number (based on its 186 * chars). This hash number is used as key in the atoms table and when the atom 187 * is used as key in a JS Map/Set. 188 * 189 * Derived string types can be queried from ancestor types via isX() and 190 * retrieved with asX() debug-only-checked casts. 191 * 192 * The ensureX() operations mutate 'this' in place to effectively make the type 193 * be at least X (e.g., ensureLinear will change a JSRope to be a JSLinearString). 194 */ 195 // clang-format on 196 197 class JSString : public js::gc::CellWithLengthAndFlags { 198 protected: 199 using Base = js::gc::CellWithLengthAndFlags; 200 201 static const size_t NUM_INLINE_CHARS_LATIN1 = 202 2 * sizeof(void*) / sizeof(JS::Latin1Char); 203 static const size_t NUM_INLINE_CHARS_TWO_BYTE = 204 2 * sizeof(void*) / sizeof(char16_t); 205 206 public: 207 // String length and flags are stored in the cell header. 208 MOZ_ALWAYS_INLINE 209 size_t length() const { return headerLengthField(); } 210 MOZ_ALWAYS_INLINE 211 uint32_t flags() const { return headerFlagsField(); } 212 213 // Class for temporarily holding character data that will be used for JSString 214 // contents. The data may be allocated in the nursery, the malloc heap, or as 215 // a StringBuffer. The class instance must be passed to the JSString 216 // constructor as a MutableHandle, so that if a GC occurs between the 217 // construction of the content and the construction of the JSString Cell to 218 // hold it, the contents can be transparently moved to the malloc heap before 219 // the nursery is reset. 220 template <typename CharT> 221 class OwnedChars { 222 public: 223 enum class Kind { 224 // Not owning any chars. chars_ should not be used. 225 Uninitialized, 226 227 // chars_ is a buffer allocated in the nursery. 228 Nursery, 229 230 // chars_ is a buffer allocated in the malloc heap. This pointer should be 231 // passed to js_free() if OwnedChars dies while still possessing 232 // ownership. 233 Malloc, 234 235 // chars_ is allocated as a refcounted StringBuffer. The reference must be 236 // released if OwnedChars dies while still possessing ownership. 237 StringBuffer, 238 }; 239 240 private: 241 mozilla::Span<CharT> chars_; 242 Kind kind_ = Kind::Uninitialized; 243 244 public: 245 OwnedChars() = default; 246 OwnedChars(CharT* chars, size_t length, Kind kind); 247 OwnedChars(js::UniquePtr<CharT[], JS::FreePolicy>&& chars, size_t length); 248 OwnedChars(RefPtr<mozilla::StringBuffer>&& buffer, size_t length); 249 OwnedChars(OwnedChars&&); 250 OwnedChars(const OwnedChars&) = delete; 251 ~OwnedChars() { reset(); } 252 253 OwnedChars& operator=(OwnedChars&&); 254 OwnedChars& operator=(const OwnedChars&) = delete; 255 256 explicit operator bool() const { 257 MOZ_ASSERT_IF(kind_ != Kind::Uninitialized, !chars_.empty()); 258 return kind_ != Kind::Uninitialized; 259 } 260 mozilla::Span<CharT> span() const { 261 MOZ_ASSERT(kind_ != Kind::Uninitialized); 262 return chars_; 263 } 264 CharT* data() const { 265 MOZ_ASSERT(kind_ != Kind::Uninitialized); 266 return chars_.data(); 267 } 268 size_t length() const { 269 MOZ_ASSERT(kind_ != Kind::Uninitialized); 270 return chars_.Length(); 271 } 272 size_t size() const { return length() * sizeof(CharT); } 273 bool isMalloced() const { return kind_ == Kind::Malloc; } 274 bool hasStringBuffer() const { return kind_ == Kind::StringBuffer; } 275 276 // Return the data and release ownership to the caller. 277 inline CharT* release(); 278 // Discard any owned data. 279 inline void reset(); 280 // Move any nursery data into the malloc heap. 281 inline void ensureNonNursery(); 282 283 // If we GC with a live OwnedChars, copy the data out of the nursery to a 284 // safely malloced location. 285 void trace(JSTracer* trc) { ensureNonNursery(); } 286 }; 287 288 protected: 289 /* Fields only apply to string types commented on the right. */ 290 struct Data { 291 // Note: 32-bit length and flags fields are inherited from 292 // CellWithLengthAndFlags. 293 294 union { 295 union { 296 /* JS(Fat)InlineString */ 297 JS::Latin1Char inlineStorageLatin1[NUM_INLINE_CHARS_LATIN1]; 298 char16_t inlineStorageTwoByte[NUM_INLINE_CHARS_TWO_BYTE]; 299 }; 300 struct { 301 union { 302 const JS::Latin1Char* nonInlineCharsLatin1; /* JSLinearString, except 303 JS(Fat)InlineString */ 304 const char16_t* nonInlineCharsTwoByte; /* JSLinearString, except 305 JS(Fat)InlineString */ 306 JSString* left; /* JSRope */ 307 JSRope* parent; /* Used in flattening */ 308 } u2; 309 union { 310 JSLinearString* base; /* JSDependentString */ 311 JSAtom* atom; /* JSAtomRefString */ 312 JSString* right; /* JSRope */ 313 size_t capacity; /* JSLinearString (extensible) */ 314 const JSExternalStringCallbacks* 315 externalCallbacks; /* JSExternalString */ 316 } u3; 317 } s; 318 }; 319 } d; 320 321 public: 322 /* Flags exposed only for jits */ 323 324 /* 325 * Flag Encoding 326 * 327 * The first word of a JSString stores flags, index, and (on some 328 * platforms) the length. The flags store both the string's type and its 329 * character encoding. 330 * 331 * If LATIN1_CHARS_BIT is set, the string's characters are stored as Latin1 332 * instead of TwoByte. This flag can also be set for ropes, if both the 333 * left and right nodes are Latin1. Flattening will result in a Latin1 334 * string in this case. When we flatten a TwoByte rope, we turn child ropes 335 * (including Latin1 ropes) into TwoByte dependent strings. If one of these 336 * strings is also part of another Latin1 rope tree, we can have a Latin1 rope 337 * with a TwoByte descendent. 338 * 339 * The other flags store the string's type. Instead of using a dense index 340 * to represent the most-derived type, string types are encoded to allow 341 * single-op tests for hot queries (isRope, isDependent, isAtom) which, in 342 * view of subtyping, would require slower (isX() || isY() || isZ()). 343 * 344 * The string type encoding can be summarized as follows. The "instance 345 * encoding" entry for a type specifies the flag bits used to create a 346 * string instance of that type. Abstract types have no instances and thus 347 * have no such entry. The "subtype predicate" entry for a type specifies 348 * the predicate used to query whether a JSString instance is subtype 349 * (reflexively) of that type. 350 * 351 * String Instance Subtype 352 * type encoding predicate 353 * ----------------------------------------- 354 * Rope 0000000 000 xxxxx0x xxx 355 * Linear 0000010 000 xxxxx1x xxx 356 * Dependent 0000110 000 xxxx1xx xxx 357 * AtomRef 1000110 000 1xxxxxx xxx 358 * External 0100010 000 x100010 xxx 359 * Extensible 0010010 000 x010010 xxx 360 * Inline 0001010 000 xxx1xxx xxx 361 * FatInline 0011010 000 xx11xxx xxx 362 * JSAtom - xxxxxx1 xxx 363 * NormalAtom 0000011 000 xxx0xx1 xxx 364 * PermanentAtom 0100011 000 x1xxxx1 xxx 365 * ThinInlineAtom 0001011 000 xx01xx1 xxx 366 * FatInlineAtom 0011011 000 xx11xx1 xxx 367 * ||||||| ||| 368 * ||||||| ||\- [0] reserved (FORWARD_BIT) 369 * ||||||| |\-- [1] reserved 370 * ||||||| \--- [2] reserved 371 * ||||||\----- [3] IsAtom 372 * |||||\------ [4] IsLinear 373 * ||||\------- [5] IsDependent 374 * |||\-------- [6] IsInline 375 * ||\--------- [7] FatInlineAtom/Extensible 376 * |\---------- [8] External/Permanent 377 * \----------- [9] AtomRef 378 * 379 * Bits 0..2 are reserved for use by the GC (see 380 * gc::CellFlagBitsReservedForGC). In particular, bit 0 is currently used for 381 * FORWARD_BIT for forwarded nursery cells. The other 2 bits are currently 382 * unused. 383 * 384 * Note that the first 4 flag bits 3..6 (from right to left in the previous 385 * table) have the following meaning and can be used for some hot queries: 386 * 387 * Bit 3: IsAtom (Atom, PermanentAtom) 388 * Bit 4: IsLinear 389 * Bit 5: IsDependent 390 * Bit 6: IsInline (Inline, FatInline, ThinInlineAtom, FatInlineAtom) 391 * 392 * If INDEX_VALUE_BIT is set, bits 16 and up will also hold an integer index. 393 */ 394 395 // The low bits of flag word are reserved by GC. 396 static_assert(js::gc::CellFlagBitsReservedForGC <= 3, 397 "JSString::flags must reserve enough bits for Cell"); 398 399 static const uint32_t ATOM_BIT = js::Bit(3); 400 static const uint32_t LINEAR_BIT = js::Bit(4); 401 static const uint32_t DEPENDENT_BIT = js::Bit(5); 402 static const uint32_t INLINE_CHARS_BIT = js::Bit(6); 403 // Indicates a dependent string pointing to an atom 404 static const uint32_t ATOM_REF_BIT = js::Bit(9); 405 406 static const uint32_t LINEAR_IS_EXTENSIBLE_BIT = js::Bit(7); 407 static const uint32_t INLINE_IS_FAT_BIT = js::Bit(7); 408 409 static const uint32_t LINEAR_IS_EXTERNAL_BIT = js::Bit(8); 410 static const uint32_t ATOM_IS_PERMANENT_BIT = js::Bit(8); 411 412 static const uint32_t EXTENSIBLE_FLAGS = 413 LINEAR_BIT | LINEAR_IS_EXTENSIBLE_BIT; 414 static const uint32_t EXTERNAL_FLAGS = LINEAR_BIT | LINEAR_IS_EXTERNAL_BIT; 415 416 static const uint32_t FAT_INLINE_MASK = INLINE_CHARS_BIT | INLINE_IS_FAT_BIT; 417 418 /* Initial flags for various types of strings. */ 419 static const uint32_t INIT_THIN_INLINE_FLAGS = LINEAR_BIT | INLINE_CHARS_BIT; 420 static const uint32_t INIT_FAT_INLINE_FLAGS = LINEAR_BIT | FAT_INLINE_MASK; 421 static const uint32_t INIT_ROPE_FLAGS = 0; 422 static const uint32_t INIT_LINEAR_FLAGS = LINEAR_BIT; 423 static const uint32_t INIT_DEPENDENT_FLAGS = LINEAR_BIT | DEPENDENT_BIT; 424 static const uint32_t INIT_ATOM_REF_FLAGS = 425 INIT_DEPENDENT_FLAGS | ATOM_REF_BIT; 426 427 static const uint32_t TYPE_FLAGS_MASK = js::BitMask(10) - js::BitMask(3); 428 static_assert((TYPE_FLAGS_MASK & js::gc::HeaderWord::RESERVED_MASK) == 0, 429 "GC reserved bits must not be used for Strings"); 430 431 // Whether this atom's characters store an uint32 index value less than or 432 // equal to MAX_ARRAY_INDEX. This bit means something different if the 433 // string is not an atom (see ATOM_REF_BIT) 434 // See JSLinearString::isIndex. 435 static const uint32_t ATOM_IS_INDEX_BIT = js::Bit(9); 436 437 // Linear strings: 438 // - Content and representation are Latin-1 characters. 439 // - Unmodifiable after construction. 440 // 441 // Ropes: 442 // - Content are Latin-1 characters. 443 // - Flag may be cleared when the rope is changed into a dependent string. 444 // 445 // Also see LATIN1_CHARS_BIT description under "Flag Encoding". 446 static const uint32_t LATIN1_CHARS_BIT = js::Bit(10); 447 448 static const uint32_t INDEX_VALUE_BIT = js::Bit(11); 449 static const uint32_t INDEX_VALUE_SHIFT = 16; 450 451 // Whether this is a non-inline linear string with a refcounted 452 // mozilla::StringBuffer. 453 // 454 // If set, d.s.u2.nonInlineChars* still points to the string's characters and 455 // the StringBuffer header is stored immediately before the characters. This 456 // allows recovering the StringBuffer from the chars pointer with 457 // StringBuffer::FromData. 458 static const uint32_t HAS_STRING_BUFFER_BIT = js::Bit(12); 459 460 // NON_DEDUP_BIT is used in string deduplication during tenuring. This bit is 461 // shared with both FLATTEN_FINISH_NODE and ATOM_IS_PERMANENT_BIT, since it 462 // only applies to linear non-atoms. 463 static const uint32_t NON_DEDUP_BIT = js::Bit(15); 464 465 // If IN_STRING_TO_ATOM_CACHE is set, this string had an entry in the 466 // StringToAtomCache at some point. Note that GC can purge the cache without 467 // clearing this bit. 468 static const uint32_t IN_STRING_TO_ATOM_CACHE = js::Bit(13); 469 470 // Flags used during rope flattening that indicate what action to perform when 471 // returning to the rope's parent rope. 472 static const uint32_t FLATTEN_VISIT_RIGHT = js::Bit(14); 473 static const uint32_t FLATTEN_FINISH_NODE = js::Bit(15); 474 static const uint32_t FLATTEN_MASK = 475 FLATTEN_VISIT_RIGHT | FLATTEN_FINISH_NODE; 476 477 // Indicates that this string is depended on by another string. A rope should 478 // never be depended on, and this should never be set during flattening, so 479 // we can reuse the FLATTEN_VISIT_RIGHT bit. 480 static const uint32_t DEPENDED_ON_BIT = FLATTEN_VISIT_RIGHT; 481 482 static const uint32_t PINNED_ATOM_BIT = js::Bit(15); 483 static const uint32_t PERMANENT_ATOM_MASK = 484 ATOM_BIT | PINNED_ATOM_BIT | ATOM_IS_PERMANENT_BIT; 485 486 static const uint32_t MAX_LENGTH = JS::MaxStringLength; 487 488 static const JS::Latin1Char MAX_LATIN1_CHAR = 0xff; 489 490 // Allocate a StringBuffer instead of using raw malloc for strings with 491 // length * sizeof(CharT) >= MIN_BYTES_FOR_BUFFER. 492 // 493 // StringBuffers can be shared more efficiently with DOM code, but have some 494 // additional overhead (StringBuffer header, null terminator) so for short 495 // strings we prefer malloc. 496 // 497 // Note that 514 was picked as a pretty conservative initial value. The value 498 // is just above 512 to ensure a Latin1 string of length 512 isn't bumped 499 // from jemalloc bucket size 512 to size 768. It's an even value because it's 500 // divided by 2 for char16_t strings. 501 static constexpr size_t MIN_BYTES_FOR_BUFFER = 514; 502 503 /* 504 * Helper function to validate that a string of a given length is 505 * representable by a JSString. An allocation overflow is reported if false 506 * is returned. 507 */ 508 static inline bool validateLength(JSContext* cx, size_t length); 509 510 template <js::AllowGC allowGC> 511 static inline bool validateLengthInternal(JSContext* cx, size_t length); 512 513 static constexpr size_t offsetOfFlags() { return offsetOfHeaderFlags(); } 514 static constexpr size_t offsetOfLength() { return offsetOfHeaderLength(); } 515 516 bool sameLengthAndFlags(const JSString& other) const { 517 return length() == other.length() && flags() == other.flags(); 518 } 519 520 static void staticAsserts() { 521 static_assert(JSString::MAX_LENGTH < UINT32_MAX, 522 "Length must fit in 32 bits"); 523 static_assert( 524 sizeof(JSString) == (offsetof(JSString, d.inlineStorageLatin1) + 525 NUM_INLINE_CHARS_LATIN1 * sizeof(char)), 526 "Inline Latin1 chars must fit in a JSString"); 527 static_assert( 528 sizeof(JSString) == (offsetof(JSString, d.inlineStorageTwoByte) + 529 NUM_INLINE_CHARS_TWO_BYTE * sizeof(char16_t)), 530 "Inline char16_t chars must fit in a JSString"); 531 532 /* Ensure js::shadow::String has the same layout. */ 533 using JS::shadow::String; 534 static_assert( 535 JSString::offsetOfRawHeaderFlagsField() == offsetof(String, flags_), 536 "shadow::String flags offset must match JSString"); 537 #if JS_BITS_PER_WORD == 32 538 static_assert(JSString::offsetOfLength() == offsetof(String, length_), 539 "shadow::String length offset must match JSString"); 540 #endif 541 static_assert(offsetof(JSString, d.s.u2.nonInlineCharsLatin1) == 542 offsetof(String, nonInlineCharsLatin1), 543 "shadow::String nonInlineChars offset must match JSString"); 544 static_assert(offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) == 545 offsetof(String, nonInlineCharsTwoByte), 546 "shadow::String nonInlineChars offset must match JSString"); 547 static_assert( 548 offsetof(JSString, d.s.u3.externalCallbacks) == 549 offsetof(String, externalCallbacks), 550 "shadow::String externalCallbacks offset must match JSString"); 551 static_assert(offsetof(JSString, d.inlineStorageLatin1) == 552 offsetof(String, inlineStorageLatin1), 553 "shadow::String inlineStorage offset must match JSString"); 554 static_assert(offsetof(JSString, d.inlineStorageTwoByte) == 555 offsetof(String, inlineStorageTwoByte), 556 "shadow::String inlineStorage offset must match JSString"); 557 static_assert(ATOM_BIT == String::ATOM_BIT, 558 "shadow::String::ATOM_BIT must match JSString::ATOM_BIT"); 559 static_assert(LINEAR_BIT == String::LINEAR_BIT, 560 "shadow::String::LINEAR_BIT must match JSString::LINEAR_BIT"); 561 static_assert(INLINE_CHARS_BIT == String::INLINE_CHARS_BIT, 562 "shadow::String::INLINE_CHARS_BIT must match " 563 "JSString::INLINE_CHARS_BIT"); 564 static_assert(LATIN1_CHARS_BIT == String::LATIN1_CHARS_BIT, 565 "shadow::String::LATIN1_CHARS_BIT must match " 566 "JSString::LATIN1_CHARS_BIT"); 567 static_assert( 568 TYPE_FLAGS_MASK == String::TYPE_FLAGS_MASK, 569 "shadow::String::TYPE_FLAGS_MASK must match JSString::TYPE_FLAGS_MASK"); 570 static_assert( 571 EXTERNAL_FLAGS == String::EXTERNAL_FLAGS, 572 "shadow::String::EXTERNAL_FLAGS must match JSString::EXTERNAL_FLAGS"); 573 } 574 575 /* Avoid silly compile errors in JSRope::flatten */ 576 friend class JSRope; 577 578 friend class js::gc::RelocationOverlay; 579 580 protected: 581 template <typename CharT> 582 MOZ_ALWAYS_INLINE void setNonInlineChars(const CharT* chars, 583 bool usesStringBuffer); 584 585 template <typename CharT> 586 static MOZ_ALWAYS_INLINE void checkStringCharsArena(const CharT* chars, 587 bool usesStringBuffer) { 588 #ifdef MOZ_DEBUG 589 // Check that the new buffer is located in the StringBufferArena. 590 // For now ignore this for StringBuffers because they can be allocated in 591 // the main jemalloc arena. 592 if (!usesStringBuffer) { 593 js::AssertJSStringBufferInCorrectArena(chars); 594 } 595 #endif 596 } 597 598 // Get correct non-inline chars enum arm for given type 599 template <typename CharT> 600 MOZ_ALWAYS_INLINE const CharT* nonInlineCharsRaw() const; 601 602 public: 603 MOZ_ALWAYS_INLINE 604 bool empty() const { return length() == 0; } 605 606 inline bool getChar(JSContext* cx, size_t index, char16_t* code); 607 inline bool getCodePoint(JSContext* cx, size_t index, char32_t* codePoint); 608 609 /* Strings have either Latin1 or TwoByte chars. */ 610 bool hasLatin1Chars() const { return flags() & LATIN1_CHARS_BIT; } 611 bool hasTwoByteChars() const { return !(flags() & LATIN1_CHARS_BIT); } 612 613 /* Strings might contain cached indexes. */ 614 bool hasIndexValue() const { return flags() & INDEX_VALUE_BIT; } 615 uint32_t getIndexValue() const { 616 MOZ_ASSERT(hasIndexValue()); 617 MOZ_ASSERT(isLinear()); 618 return flags() >> INDEX_VALUE_SHIFT; 619 } 620 621 /* 622 * Whether any dependent strings point to this string's chars. This is needed 623 * so that we don't replace the string with a forwarded atom and free its 624 * buffer. 625 * 626 * NOTE: we specifically do not set this for atoms, because they are accessed 627 * on many threads and we don't want to mess with their flags if we don't 628 * have to, and it is safe because atoms will never be replaced by an atom 629 * ref. 630 */ 631 bool isDependedOn() const { 632 bool result = flags() & DEPENDED_ON_BIT; 633 MOZ_ASSERT_IF(result, !isRope() && !isAtom()); 634 return result; 635 } 636 637 bool assertIsValidBase() const { 638 // See isDependedOn comment for why we're excluding atoms 639 return isAtom() || isDependedOn(); 640 } 641 642 void setDependedOn() { 643 MOZ_ASSERT(!isRope()); 644 if (isAtom()) { 645 return; 646 } 647 setFlagBit(DEPENDED_ON_BIT); 648 } 649 650 inline size_t allocSize() const; 651 652 /* Fallible conversions to more-derived string types. */ 653 654 inline JSLinearString* ensureLinear(JSContext* cx); 655 656 /* Type query and debug-checked casts */ 657 658 MOZ_ALWAYS_INLINE 659 bool isRope() const { return !(flags() & LINEAR_BIT); } 660 661 MOZ_ALWAYS_INLINE 662 JSRope& asRope() const { 663 MOZ_ASSERT(isRope()); 664 return *(JSRope*)this; 665 } 666 667 MOZ_ALWAYS_INLINE 668 bool isLinear() const { return flags() & LINEAR_BIT; } 669 670 MOZ_ALWAYS_INLINE 671 JSLinearString& asLinear() const { 672 MOZ_ASSERT(JSString::isLinear()); 673 return *(JSLinearString*)this; 674 } 675 676 MOZ_ALWAYS_INLINE 677 bool isDependent() const { return flags() & DEPENDENT_BIT; } 678 679 MOZ_ALWAYS_INLINE 680 bool isAtomRef() const { 681 return (flags() & ATOM_REF_BIT) && !(flags() & ATOM_BIT); 682 } 683 684 MOZ_ALWAYS_INLINE 685 JSDependentString& asDependent() const { 686 MOZ_ASSERT(isDependent()); 687 return *(JSDependentString*)this; 688 } 689 690 MOZ_ALWAYS_INLINE 691 bool isExtensible() const { 692 return (flags() & TYPE_FLAGS_MASK) == EXTENSIBLE_FLAGS; 693 } 694 695 MOZ_ALWAYS_INLINE 696 JSExtensibleString& asExtensible() const { 697 MOZ_ASSERT(isExtensible()); 698 return *(JSExtensibleString*)this; 699 } 700 701 MOZ_ALWAYS_INLINE 702 bool isInline() const { return flags() & INLINE_CHARS_BIT; } 703 704 MOZ_ALWAYS_INLINE 705 JSInlineString& asInline() const { 706 MOZ_ASSERT(isInline()); 707 return *(JSInlineString*)this; 708 } 709 710 MOZ_ALWAYS_INLINE 711 bool isFatInline() const { 712 return (flags() & FAT_INLINE_MASK) == FAT_INLINE_MASK; 713 } 714 715 /* For hot code, prefer other type queries. */ 716 bool isExternal() const { 717 return (flags() & TYPE_FLAGS_MASK) == EXTERNAL_FLAGS; 718 } 719 720 MOZ_ALWAYS_INLINE 721 JSExternalString& asExternal() const { 722 MOZ_ASSERT(isExternal()); 723 return *(JSExternalString*)this; 724 } 725 726 MOZ_ALWAYS_INLINE 727 bool isAtom() const { return flags() & ATOM_BIT; } 728 729 MOZ_ALWAYS_INLINE 730 bool isPermanentAtom() const { 731 return (flags() & PERMANENT_ATOM_MASK) == PERMANENT_ATOM_MASK; 732 } 733 734 MOZ_ALWAYS_INLINE 735 JSAtom& asAtom() const { 736 MOZ_ASSERT(isAtom()); 737 return *(JSAtom*)this; 738 } 739 740 MOZ_ALWAYS_INLINE 741 js::JSOffThreadAtom& asOffThreadAtom() const { 742 MOZ_ASSERT(headerFlagsFieldAtomic() & ATOM_BIT); 743 return *(js::JSOffThreadAtom*)this; 744 } 745 746 MOZ_ALWAYS_INLINE 747 void setNonDeduplicatable() { 748 MOZ_ASSERT(isLinear()); 749 MOZ_ASSERT(!isAtom()); 750 setFlagBit(NON_DEDUP_BIT); 751 } 752 753 // After copying a string from the nursery to the tenured heap, adjust bits 754 // that no longer apply. 755 MOZ_ALWAYS_INLINE 756 void clearBitsOnTenure() { 757 MOZ_ASSERT(!isAtom()); 758 clearFlagBit(NON_DEDUP_BIT | IN_STRING_TO_ATOM_CACHE); 759 } 760 761 // NON_DEDUP_BIT is only valid for linear non-atoms. 762 MOZ_ALWAYS_INLINE 763 bool isDeduplicatable() const { 764 MOZ_ASSERT(isLinear()); 765 MOZ_ASSERT(!isAtom()); 766 return !(flags() & NON_DEDUP_BIT); 767 } 768 769 void setInStringToAtomCache() { 770 MOZ_ASSERT(!isAtom()); 771 setFlagBit(IN_STRING_TO_ATOM_CACHE); 772 } 773 bool inStringToAtomCache() const { return flags() & IN_STRING_TO_ATOM_CACHE; } 774 775 // Fills |array| with various strings that represent the different string 776 // kinds and character encodings. 777 static bool fillWithRepresentatives(JSContext* cx, 778 JS::Handle<js::ArrayObject*> array); 779 780 /* Only called by the GC for dependent strings. */ 781 782 inline bool hasBase() const { return isDependent(); } 783 784 inline JSLinearString* base() const; 785 786 inline JSAtom* atom() const; 787 788 // The base may be forwarded and becomes a relocation overlay. 789 // The return value can be a relocation overlay when the base is forwarded, 790 // or the return value can be the actual base when it is not forwarded. 791 inline JSLinearString* nurseryBaseOrRelocOverlay() const; 792 793 inline bool canOwnDependentChars() const; 794 795 bool tryReplaceWithAtomRef(JSAtom* atom); 796 797 void traceBase(JSTracer* trc); 798 799 /* Only called by the GC for strings with the AllocKind::STRING kind. */ 800 801 inline void finalize(JS::GCContext* gcx); 802 803 /* Gets the number of bytes that the chars take on the heap. */ 804 805 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); 806 807 bool hasOutOfLineChars() const { 808 return isLinear() && !isInline() && !isDependent() && !isExternal(); 809 } 810 811 inline bool ownsMallocedChars() const; 812 813 bool hasStringBuffer() const { 814 MOZ_ASSERT_IF(flags() & HAS_STRING_BUFFER_BIT, 815 isLinear() && !isInline() && !isDependent() && !isExternal()); 816 return flags() & HAS_STRING_BUFFER_BIT; 817 } 818 819 /* Encode as many scalar values of the string as UTF-8 as can fit 820 * into the caller-provided buffer replacing unpaired surrogates 821 * with the REPLACEMENT CHARACTER. 822 * 823 * Returns the number of code units read and the number of code units 824 * written. 825 * 826 * The semantics of this method match the semantics of 827 * TextEncoder.encodeInto(). 828 * 829 * This function doesn't modify the representation -- rope, linear, 830 * flat, atom, etc. -- of this string. If this string is a rope, 831 * it also doesn't modify the representation of left or right halves 832 * of this string, or of those halves, and so on. 833 * 834 * Returns mozilla::Nothing on OOM. 835 */ 836 mozilla::Maybe<std::tuple<size_t, size_t>> encodeUTF8Partial( 837 const JS::AutoRequireNoGC& nogc, mozilla::Span<char> buffer) const; 838 839 private: 840 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler 841 // to call the method below. 842 friend class js::jit::MacroAssembler; 843 static size_t offsetOfNonInlineChars() { 844 static_assert( 845 offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) == 846 offsetof(JSString, d.s.u2.nonInlineCharsLatin1), 847 "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset"); 848 return offsetof(JSString, d.s.u2.nonInlineCharsTwoByte); 849 } 850 851 public: 852 static const JS::TraceKind TraceKind = JS::TraceKind::String; 853 854 JS::Zone* zone() const { 855 if (isTenured()) { 856 // Allow permanent atoms to be accessed across zones and runtimes. 857 if (isPermanentAtom()) { 858 return zoneFromAnyThread(); 859 } 860 return asTenured().zone(); 861 } 862 return nurseryZone(); 863 } 864 865 void setLengthAndFlags(uint32_t len, uint32_t flags) { 866 setHeaderLengthAndFlags(len, flags); 867 } 868 void setFlagBit(uint32_t flag) { setHeaderFlagBit(flag); } 869 void clearFlagBit(uint32_t flag) { clearHeaderFlagBit(flag); } 870 871 void fixupAfterMovingGC() {} 872 873 js::gc::AllocKind getAllocKind() const { 874 using js::gc::AllocKind; 875 AllocKind kind; 876 if (isAtom()) { 877 if (isFatInline()) { 878 kind = AllocKind::FAT_INLINE_ATOM; 879 } else { 880 kind = AllocKind::ATOM; 881 } 882 } else if (isFatInline()) { 883 kind = AllocKind::FAT_INLINE_STRING; 884 } else if (isExternal()) { 885 kind = AllocKind::EXTERNAL_STRING; 886 } else { 887 kind = AllocKind::STRING; 888 } 889 MOZ_ASSERT_IF(isTenured(), kind == asTenured().getAllocKind()); 890 return kind; 891 } 892 893 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) 894 void dump() const; 895 void dump(js::GenericPrinter& out) const; 896 void dump(js::JSONPrinter& json) const; 897 898 void dumpCommonFields(js::JSONPrinter& json) const; 899 void dumpCharsFields(js::JSONPrinter& json) const; 900 901 void dumpFields(js::JSONPrinter& json) const; 902 void dumpStringContent(js::GenericPrinter& out) const; 903 void dumpPropertyName(js::GenericPrinter& out) const; 904 905 void dumpChars(js::GenericPrinter& out) const; 906 void dumpCharsSingleQuote(js::GenericPrinter& out) const; 907 void dumpCharsNoQuote(js::GenericPrinter& out) const; 908 909 template <typename CharT> 910 static void dumpCharsNoQuote(const CharT* s, size_t len, 911 js::GenericPrinter& out); 912 913 void dumpRepresentation() const; 914 void dumpRepresentation(js::GenericPrinter& out) const; 915 void dumpRepresentation(js::JSONPrinter& json) const; 916 void dumpRepresentationFields(js::JSONPrinter& json) const; 917 918 bool equals(const char* s); 919 #endif 920 921 void traceChildren(JSTracer* trc); 922 923 // Override base class implementation to tell GC about permanent atoms. 924 bool isPermanentAndMayBeShared() const { return isPermanentAtom(); } 925 926 static void addCellAddressToStoreBuffer(js::gc::StoreBuffer* buffer, 927 js::gc::Cell** cellp) { 928 buffer->putCell(reinterpret_cast<JSString**>(cellp)); 929 } 930 931 static void removeCellAddressFromStoreBuffer(js::gc::StoreBuffer* buffer, 932 js::gc::Cell** cellp) { 933 buffer->unputCell(reinterpret_cast<JSString**>(cellp)); 934 } 935 936 private: 937 JSString(const JSString& other) = delete; 938 void operator=(const JSString& other) = delete; 939 940 protected: 941 JSString() = default; 942 }; 943 944 namespace js { 945 946 template <typename Wrapper, typename CharT> 947 class WrappedPtrOperations<JSString::OwnedChars<CharT>, Wrapper> { 948 const JSString::OwnedChars<CharT>& get() const { 949 return static_cast<const Wrapper*>(this)->get(); 950 } 951 952 public: 953 explicit operator bool() const { return !!get(); } 954 mozilla::Span<CharT> span() const { return get().span(); } 955 CharT* data() const { return get().data(); } 956 size_t length() const { return get().length(); } 957 size_t size() const { return get().size(); } 958 bool isMalloced() const { return get().isMalloced(); } 959 bool hasStringBuffer() const { return get().hasStringBuffer(); } 960 }; 961 962 template <typename Wrapper, typename CharT> 963 class MutableWrappedPtrOperations<JSString::OwnedChars<CharT>, Wrapper> 964 : public WrappedPtrOperations<JSString::OwnedChars<CharT>, Wrapper> { 965 JSString::OwnedChars<CharT>& get() { 966 return static_cast<Wrapper*>(this)->get(); 967 } 968 969 public: 970 CharT* release() { return get().release(); } 971 void reset() { get().reset(); } 972 void ensureNonNursery() { get().ensureNonNursery(); } 973 }; 974 975 } /* namespace js */ 976 977 class JSRope : public JSString { 978 friend class js::gc::CellAllocator; 979 980 template <typename CharT> 981 js::UniquePtr<CharT[], JS::FreePolicy> copyCharsInternal( 982 JSContext* cx, arena_id_t destArenaId) const; 983 984 enum UsingBarrier : bool { NoBarrier = false, WithIncrementalBarrier = true }; 985 986 friend class JSString; 987 JSLinearString* flatten(JSContext* maybecx); 988 989 JSLinearString* flattenInternal(); 990 template <UsingBarrier usingBarrier> 991 JSLinearString* flattenInternal(); 992 993 template <UsingBarrier usingBarrier, typename CharT> 994 static JSLinearString* flattenInternal(JSRope* root); 995 996 template <UsingBarrier usingBarrier> 997 static void ropeBarrierDuringFlattening(JSRope* rope); 998 999 JSRope(JSString* left, JSString* right, size_t length); 1000 1001 public: 1002 template <js::AllowGC allowGC> 1003 static inline JSRope* new_( 1004 JSContext* cx, 1005 typename js::MaybeRooted<JSString*, allowGC>::HandleType left, 1006 typename js::MaybeRooted<JSString*, allowGC>::HandleType right, 1007 size_t length, js::gc::Heap = js::gc::Heap::Default); 1008 1009 js::UniquePtr<JS::Latin1Char[], JS::FreePolicy> copyLatin1Chars( 1010 JSContext* maybecx, arena_id_t destArenaId) const; 1011 JS::UniqueTwoByteChars copyTwoByteChars(JSContext* maybecx, 1012 arena_id_t destArenaId) const; 1013 1014 template <typename CharT> 1015 js::UniquePtr<CharT[], JS::FreePolicy> copyChars( 1016 JSContext* maybecx, arena_id_t destArenaId) const; 1017 1018 // Hash function specific for ropes that avoids allocating a temporary 1019 // string. There are still allocations internally so it's technically 1020 // fallible. 1021 // 1022 // Returns the same value as if this were a linear string being hashed. 1023 [[nodiscard]] bool hash(uint32_t* outhHash) const; 1024 1025 // The process of flattening a rope temporarily overwrites the left pointer of 1026 // interior nodes in the rope DAG with the parent pointer. 1027 bool isBeingFlattened() const { return flags() & FLATTEN_MASK; } 1028 1029 JSString* leftChild() const { 1030 MOZ_ASSERT(isRope()); 1031 MOZ_ASSERT(!isBeingFlattened()); // Flattening overwrites this field. 1032 return d.s.u2.left; 1033 } 1034 1035 JSString* rightChild() const { 1036 MOZ_ASSERT(isRope()); 1037 return d.s.u3.right; 1038 } 1039 1040 void traceChildren(JSTracer* trc); 1041 1042 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) 1043 void dumpOwnRepresentationFields(js::JSONPrinter& json) const; 1044 #endif 1045 1046 private: 1047 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler 1048 // to call the methods below. 1049 friend class js::jit::MacroAssembler; 1050 1051 static size_t offsetOfLeft() { return offsetof(JSRope, d.s.u2.left); } 1052 static size_t offsetOfRight() { return offsetof(JSRope, d.s.u3.right); } 1053 }; 1054 1055 static_assert(sizeof(JSRope) == sizeof(JSString), 1056 "string subclasses must be binary-compatible with JSString"); 1057 1058 /* 1059 * There are optimized entry points for some string allocation functions. 1060 * 1061 * The meaning of suffix: 1062 * * "MaybeDeflate": for char16_t variant, characters can fit Latin1 1063 * * "DontDeflate": for char16_t variant, characters don't fit Latin1 1064 * * "NonStatic": characters don't match StaticStrings 1065 * * "ValidLength": length fits JSString::MAX_LENGTH 1066 */ 1067 1068 class JSLinearString : public JSString { 1069 friend class JSString; 1070 friend class JS::AutoStableStringChars; 1071 friend class js::gc::TenuringTracer; 1072 friend class js::gc::CellAllocator; 1073 friend class JSDependentString; // To allow access when used as base. 1074 1075 /* Vacuous and therefore unimplemented. */ 1076 JSLinearString* ensureLinear(JSContext* cx) = delete; 1077 bool isLinear() const = delete; 1078 JSLinearString& asLinear() const = delete; 1079 1080 JSLinearString(const char16_t* chars, size_t length, bool hasBuffer); 1081 JSLinearString(const JS::Latin1Char* chars, size_t length, bool hasBuffer); 1082 template <typename CharT> 1083 explicit inline JSLinearString(JS::MutableHandle<OwnedChars<CharT>> chars); 1084 1085 protected: 1086 // Used to construct subclasses that do a full initialization themselves. 1087 JSLinearString() = default; 1088 1089 /* Returns void pointer to latin1/twoByte chars, for finalizers. */ 1090 MOZ_ALWAYS_INLINE 1091 void* nonInlineCharsRaw() const { 1092 MOZ_ASSERT(!isInline()); 1093 static_assert( 1094 offsetof(JSLinearString, d.s.u2.nonInlineCharsTwoByte) == 1095 offsetof(JSLinearString, d.s.u2.nonInlineCharsLatin1), 1096 "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset"); 1097 return (void*)d.s.u2.nonInlineCharsTwoByte; 1098 } 1099 1100 MOZ_ALWAYS_INLINE const JS::Latin1Char* rawLatin1Chars() const; 1101 MOZ_ALWAYS_INLINE const char16_t* rawTwoByteChars() const; 1102 1103 public: 1104 template <js::AllowGC allowGC, typename CharT> 1105 static inline JSLinearString* new_(JSContext* cx, 1106 JS::MutableHandle<OwnedChars<CharT>> chars, 1107 js::gc::Heap heap); 1108 1109 template <js::AllowGC allowGC, typename CharT> 1110 static inline JSLinearString* newValidLength( 1111 JSContext* cx, JS::MutableHandle<OwnedChars<CharT>> chars, 1112 js::gc::Heap heap); 1113 1114 // Convert a plain linear string to an extensible string. For testing. The 1115 // caller must ensure that it is a plain or extensible string already, and 1116 // that `capacity` is adequate. 1117 JSExtensibleString& makeExtensible(size_t capacity); 1118 1119 template <typename CharT> 1120 MOZ_ALWAYS_INLINE const CharT* nonInlineChars( 1121 const JS::AutoRequireNoGC& nogc) const; 1122 1123 MOZ_ALWAYS_INLINE 1124 const JS::Latin1Char* nonInlineLatin1Chars( 1125 const JS::AutoRequireNoGC& nogc) const { 1126 MOZ_ASSERT(!isInline()); 1127 MOZ_ASSERT(hasLatin1Chars()); 1128 return d.s.u2.nonInlineCharsLatin1; 1129 } 1130 1131 MOZ_ALWAYS_INLINE 1132 const char16_t* nonInlineTwoByteChars(const JS::AutoRequireNoGC& nogc) const { 1133 MOZ_ASSERT(!isInline()); 1134 MOZ_ASSERT(hasTwoByteChars()); 1135 return d.s.u2.nonInlineCharsTwoByte; 1136 } 1137 1138 template <typename CharT> 1139 MOZ_ALWAYS_INLINE const CharT* chars(const JS::AutoRequireNoGC& nogc) const; 1140 1141 MOZ_ALWAYS_INLINE 1142 const JS::Latin1Char* latin1Chars(const JS::AutoRequireNoGC& nogc) const { 1143 return rawLatin1Chars(); 1144 } 1145 1146 MOZ_ALWAYS_INLINE 1147 const char16_t* twoByteChars(const JS::AutoRequireNoGC& nogc) const { 1148 return rawTwoByteChars(); 1149 } 1150 1151 mozilla::Range<const JS::Latin1Char> latin1Range( 1152 const JS::AutoRequireNoGC& nogc) const { 1153 MOZ_ASSERT(JSString::isLinear()); 1154 return mozilla::Range<const JS::Latin1Char>(latin1Chars(nogc), length()); 1155 } 1156 1157 mozilla::Range<const char16_t> twoByteRange( 1158 const JS::AutoRequireNoGC& nogc) const { 1159 MOZ_ASSERT(JSString::isLinear()); 1160 return mozilla::Range<const char16_t>(twoByteChars(nogc), length()); 1161 } 1162 1163 template <typename CharT> 1164 mozilla::Range<const CharT> range(const JS::AutoRequireNoGC& nogc) const { 1165 if constexpr (std::is_same_v<CharT, JS::Latin1Char>) { 1166 return latin1Range(nogc); 1167 } else { 1168 return twoByteRange(nogc); 1169 } 1170 } 1171 1172 MOZ_ALWAYS_INLINE 1173 char16_t latin1OrTwoByteChar(size_t index) const { 1174 MOZ_ASSERT(JSString::isLinear()); 1175 MOZ_ASSERT(index < length()); 1176 JS::AutoCheckCannotGC nogc; 1177 return hasLatin1Chars() ? latin1Chars(nogc)[index] 1178 : twoByteChars(nogc)[index]; 1179 } 1180 1181 bool isIndexSlow(uint32_t* indexp) const { 1182 MOZ_ASSERT(JSString::isLinear()); 1183 size_t len = length(); 1184 if (len == 0 || len > js::UINT32_CHAR_BUFFER_LENGTH) { 1185 return false; 1186 } 1187 JS::AutoCheckCannotGC nogc; 1188 if (hasLatin1Chars()) { 1189 const JS::Latin1Char* s = latin1Chars(nogc); 1190 return mozilla::IsAsciiDigit(*s) && 1191 js::CheckStringIsIndex(s, len, indexp); 1192 } 1193 const char16_t* s = twoByteChars(nogc); 1194 return mozilla::IsAsciiDigit(*s) && js::CheckStringIsIndex(s, len, indexp); 1195 } 1196 1197 // Returns true if this string's characters store an unsigned 32-bit integer 1198 // value less than or equal to MAX_ARRAY_INDEX, initializing *indexp to that 1199 // value if so. Leading '0' isn't allowed except 0 itself. 1200 // (Thus if calling isIndex returns true, js::IndexToString(cx, *indexp) will 1201 // be a string equal to this string.) 1202 inline bool isIndex(uint32_t* indexp) const; 1203 1204 // Return whether the characters of this string can be moved by minor or 1205 // compacting GC. 1206 inline bool hasMovableChars() const; 1207 1208 bool hasCharsInCollectedNurseryRegion() const; 1209 1210 void maybeInitializeIndexValue(uint32_t index, bool allowAtom = false) { 1211 MOZ_ASSERT(JSString::isLinear()); 1212 MOZ_ASSERT_IF(hasIndexValue(), getIndexValue() == index); 1213 MOZ_ASSERT_IF(!allowAtom, !isAtom()); 1214 1215 if (hasIndexValue() || index > UINT16_MAX) { 1216 return; 1217 } 1218 1219 mozilla::DebugOnly<uint32_t> containedIndex; 1220 MOZ_ASSERT(isIndexSlow(&containedIndex)); 1221 MOZ_ASSERT(index == containedIndex); 1222 1223 setFlagBit((index << INDEX_VALUE_SHIFT) | INDEX_VALUE_BIT); 1224 MOZ_ASSERT(getIndexValue() == index); 1225 } 1226 1227 mozilla::StringBuffer* stringBuffer() const { 1228 MOZ_ASSERT(hasStringBuffer()); 1229 auto* chars = nonInlineCharsRaw(); 1230 return mozilla::StringBuffer::FromData(const_cast<void*>(chars)); 1231 } 1232 1233 /* 1234 * Returns a property name represented by this string, or null on failure. 1235 * You must verify that this is not an index per isIndex before calling 1236 * this method. 1237 */ 1238 inline js::PropertyName* toPropertyName(JSContext* cx); 1239 1240 // Make sure chars are not in the nursery, mallocing and copying if necessary. 1241 // Should only be called during minor GC on a string that has been promoted 1242 // to the tenured heap and may still point to nursery-allocated chars. 1243 template <typename CharT> 1244 inline size_t maybeMallocCharsOnPromotion(js::Nursery* nursery); 1245 1246 // Handle an edge case where a dependent chain N1 -> T2 -> N3 cannot handle N3 1247 // moving its chars (or more specifically, updating N1 to the new chars.) When 1248 // this is detected, convert N1 to a regular string with its own storage. 1249 // 1250 // Returns whether the chars were cloned. 1251 template <typename CharT> 1252 static void maybeCloneCharsOnPromotionTyped(JSLinearString* str); 1253 1254 static void maybeCloneCharsOnPromotion(JSLinearString* str) { 1255 if (str->hasLatin1Chars()) { 1256 maybeCloneCharsOnPromotionTyped<JS::Latin1Char>(str); 1257 } else { 1258 maybeCloneCharsOnPromotionTyped<char16_t>(str); 1259 } 1260 } 1261 1262 inline void finalize(JS::GCContext* gcx); 1263 inline size_t allocSize() const; 1264 1265 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) 1266 void dumpOwnRepresentationFields(js::JSONPrinter& json) const; 1267 #endif 1268 1269 // Make a partially-initialized string safe for finalization. 1270 inline void disownCharsBecauseError(); 1271 }; 1272 1273 static_assert(sizeof(JSLinearString) == sizeof(JSString), 1274 "string subclasses must be binary-compatible with JSString"); 1275 1276 namespace JS { 1277 enum class ContractBaseChain : bool { AllowLong = false, Contract = true }; 1278 } 1279 1280 class JSDependentString : public JSLinearString { 1281 friend class JSString; 1282 friend class js::gc::CellAllocator; 1283 1284 JSDependentString(JSLinearString* base, size_t start, size_t length); 1285 1286 // For JIT string allocation. 1287 JSDependentString() = default; 1288 1289 /* Vacuous and therefore unimplemented. */ 1290 bool isDependent() const = delete; 1291 JSDependentString& asDependent() const = delete; 1292 1293 /* The offset of this string's chars in base->chars(). */ 1294 MOZ_ALWAYS_INLINE size_t baseOffset() const { 1295 MOZ_ASSERT(JSString::isDependent()); 1296 JS::AutoCheckCannotGC nogc; 1297 size_t offset; 1298 if (hasTwoByteChars()) { 1299 offset = twoByteChars(nogc) - base()->twoByteChars(nogc); 1300 } else { 1301 offset = latin1Chars(nogc) - base()->latin1Chars(nogc); 1302 } 1303 MOZ_ASSERT(offset < base()->length()); 1304 return offset; 1305 } 1306 1307 public: 1308 template <JS::ContractBaseChain contract> 1309 static inline JSLinearString* newImpl_(JSContext* cx, JSLinearString* base, 1310 size_t start, size_t length, 1311 js::gc::Heap heap); 1312 1313 // This will always return a dependent string, and will assert if the chars 1314 // could fit into an inline string. 1315 static inline JSLinearString* new_(JSContext* cx, JSLinearString* base, 1316 size_t start, size_t length, 1317 js::gc::Heap heap); 1318 1319 // Only called by the GC during nursery collection. 1320 void setBase(JSLinearString* newBase); 1321 1322 template <typename T> 1323 void relocateBaseAndChars(JSLinearString* base, T chars, size_t offset) { 1324 MOZ_ASSERT(base->assertIsValidBase()); 1325 bool usesStringBuffer = base->hasStringBuffer(); 1326 setNonInlineChars(chars + offset, usesStringBuffer); 1327 setBase(base); 1328 } 1329 1330 JSLinearString* rootBaseDuringMinorGC(); 1331 1332 template <typename CharT> 1333 inline void updateToPromotedBaseImpl(JSLinearString* base); 1334 1335 inline void updateToPromotedBase(JSLinearString* base); 1336 1337 // Avoid creating a dependent string if no more than 6.25% (1/16) of the base 1338 // string are used, to prevent tiny dependent strings keeping large base 1339 // strings alive. (The percentage was chosen as a somewhat arbitrary threshold 1340 // that is easy to compute.) 1341 // 1342 // Note that currently this limit only applies during tenuring; in the 1343 // nursery, small dependent strings will be created but then cloned into 1344 // unshared strings during tenuring. (The base string will not be marked in 1345 // this case.) 1346 static bool smallComparedToBase(size_t sharedChars, size_t baseChars) { 1347 return sharedChars <= (baseChars >> 4); 1348 } 1349 1350 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) 1351 void dumpOwnRepresentationFields(js::JSONPrinter& json) const; 1352 #endif 1353 1354 private: 1355 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler 1356 // to call the method below. 1357 friend class js::jit::MacroAssembler; 1358 1359 inline static size_t offsetOfBase() { 1360 return offsetof(JSDependentString, d.s.u3.base); 1361 } 1362 }; 1363 1364 static_assert(sizeof(JSDependentString) == sizeof(JSString), 1365 "string subclasses must be binary-compatible with JSString"); 1366 1367 class JSAtomRefString : public JSDependentString { 1368 friend class JSString; 1369 friend class js::gc::CellAllocator; 1370 friend class js::jit::MacroAssembler; 1371 1372 public: 1373 inline static size_t offsetOfAtom() { 1374 return offsetof(JSAtomRefString, d.s.u3.atom); 1375 } 1376 }; 1377 1378 static_assert(sizeof(JSAtomRefString) == sizeof(JSString), 1379 "string subclasses must be binary-compatible with JSString"); 1380 1381 class JSExtensibleString : public JSLinearString { 1382 /* Vacuous and therefore unimplemented. */ 1383 bool isExtensible() const = delete; 1384 JSExtensibleString& asExtensible() const = delete; 1385 1386 public: 1387 MOZ_ALWAYS_INLINE 1388 size_t capacity() const { 1389 MOZ_ASSERT(JSString::isExtensible()); 1390 return d.s.u3.capacity; 1391 } 1392 1393 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) 1394 void dumpOwnRepresentationFields(js::JSONPrinter& json) const; 1395 #endif 1396 }; 1397 1398 static_assert(sizeof(JSExtensibleString) == sizeof(JSString), 1399 "string subclasses must be binary-compatible with JSString"); 1400 1401 class JSInlineString : public JSLinearString { 1402 public: 1403 MOZ_ALWAYS_INLINE 1404 const JS::Latin1Char* latin1Chars(const JS::AutoRequireNoGC& nogc) const { 1405 MOZ_ASSERT(JSString::isInline()); 1406 MOZ_ASSERT(hasLatin1Chars()); 1407 return d.inlineStorageLatin1; 1408 } 1409 1410 MOZ_ALWAYS_INLINE 1411 const char16_t* twoByteChars(const JS::AutoRequireNoGC& nogc) const { 1412 MOZ_ASSERT(JSString::isInline()); 1413 MOZ_ASSERT(hasTwoByteChars()); 1414 return d.inlineStorageTwoByte; 1415 } 1416 1417 template <typename CharT> 1418 static bool lengthFits(size_t length); 1419 1420 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) 1421 void dumpOwnRepresentationFields(js::JSONPrinter& json) const; 1422 #endif 1423 1424 private: 1425 // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler 1426 // to call the method below. 1427 friend class js::jit::MacroAssembler; 1428 static size_t offsetOfInlineStorage() { 1429 return offsetof(JSInlineString, d.inlineStorageTwoByte); 1430 } 1431 }; 1432 1433 static_assert(sizeof(JSInlineString) == sizeof(JSString), 1434 "string subclasses must be binary-compatible with JSString"); 1435 1436 /* 1437 * On 32-bit platforms, JSThinInlineString can store 8 Latin1 characters or 4 1438 * TwoByte characters inline. On 64-bit platforms, these numbers are 16 and 8, 1439 * respectively. 1440 */ 1441 class JSThinInlineString : public JSInlineString { 1442 friend class js::gc::CellAllocator; 1443 1444 // The constructors return a mutable pointer to the data, because the first 1445 // thing any creator will do is copy in the string value. This also 1446 // conveniently allows doing overload resolution on CharT. 1447 explicit JSThinInlineString(size_t length, JS::Latin1Char** chars); 1448 explicit JSThinInlineString(size_t length, char16_t** chars); 1449 1450 // For JIT string allocation. 1451 JSThinInlineString() = default; 1452 1453 public: 1454 static constexpr size_t InlineBytes = NUM_INLINE_CHARS_LATIN1; 1455 1456 static const size_t MAX_LENGTH_LATIN1 = NUM_INLINE_CHARS_LATIN1; 1457 static const size_t MAX_LENGTH_TWO_BYTE = NUM_INLINE_CHARS_TWO_BYTE; 1458 1459 template <js::AllowGC allowGC> 1460 static inline JSThinInlineString* new_(JSContext* cx, js::gc::Heap heap); 1461 1462 template <typename CharT> 1463 static bool lengthFits(size_t length); 1464 }; 1465 1466 static_assert(sizeof(JSThinInlineString) == sizeof(JSString), 1467 "string subclasses must be binary-compatible with JSString"); 1468 1469 /* 1470 * On both 32-bit and 64-bit platforms, MAX_LENGTH_TWO_BYTE is 12 and 1471 * MAX_LENGTH_LATIN1 is 24. This is deliberate, in order to minimize potential 1472 * performance differences between 32-bit and 64-bit platforms. 1473 * 1474 * There are still some differences due to NUM_INLINE_CHARS_* being different. 1475 * E.g. TwoByte strings of length 5--8 will be JSFatInlineStrings on 32-bit 1476 * platforms and JSThinInlineStrings on 64-bit platforms. But the more 1477 * significant transition from inline strings to non-inline strings occurs at 1478 * length 12 (for TwoByte strings) and 24 (Latin1 strings) on both 32-bit and 1479 * 64-bit platforms. 1480 */ 1481 class JSFatInlineString : public JSInlineString { 1482 friend class js::gc::CellAllocator; 1483 1484 static const size_t INLINE_EXTENSION_CHARS_LATIN1 = 1485 24 - NUM_INLINE_CHARS_LATIN1; 1486 static const size_t INLINE_EXTENSION_CHARS_TWO_BYTE = 1487 12 - NUM_INLINE_CHARS_TWO_BYTE; 1488 1489 // The constructors return a mutable pointer to the data, because the first 1490 // thing any creator will do is copy in the string value. This also 1491 // conveniently allows doing overload resolution on CharT. 1492 explicit JSFatInlineString(size_t length, JS::Latin1Char** chars); 1493 explicit JSFatInlineString(size_t length, char16_t** chars); 1494 1495 // For JIT string allocation. 1496 JSFatInlineString() = default; 1497 1498 protected: /* to fool clang into not warning this is unused */ 1499 union { 1500 char inlineStorageExtensionLatin1[INLINE_EXTENSION_CHARS_LATIN1]; 1501 char16_t inlineStorageExtensionTwoByte[INLINE_EXTENSION_CHARS_TWO_BYTE]; 1502 }; 1503 1504 public: 1505 template <js::AllowGC allowGC> 1506 static inline JSFatInlineString* new_(JSContext* cx, js::gc::Heap heap); 1507 1508 static const size_t MAX_LENGTH_LATIN1 = 1509 JSString::NUM_INLINE_CHARS_LATIN1 + INLINE_EXTENSION_CHARS_LATIN1; 1510 1511 static const size_t MAX_LENGTH_TWO_BYTE = 1512 JSString::NUM_INLINE_CHARS_TWO_BYTE + INLINE_EXTENSION_CHARS_TWO_BYTE; 1513 1514 template <typename CharT> 1515 static bool lengthFits(size_t length); 1516 1517 // Only called by the GC for strings with the AllocKind::FAT_INLINE_STRING 1518 // kind. 1519 MOZ_ALWAYS_INLINE void finalize(JS::GCContext* gcx); 1520 }; 1521 1522 static_assert(sizeof(JSFatInlineString) % js::gc::CellAlignBytes == 0, 1523 "fat inline strings shouldn't waste space up to the next cell " 1524 "boundary"); 1525 1526 class JSExternalString : public JSLinearString { 1527 friend class js::gc::CellAllocator; 1528 1529 JSExternalString(const JS::Latin1Char* chars, size_t length, 1530 const JSExternalStringCallbacks* callbacks); 1531 JSExternalString(const char16_t* chars, size_t length, 1532 const JSExternalStringCallbacks* callbacks); 1533 1534 /* Vacuous and therefore unimplemented. */ 1535 bool isExternal() const = delete; 1536 JSExternalString& asExternal() const = delete; 1537 1538 template <typename CharT> 1539 static inline JSExternalString* newImpl( 1540 JSContext* cx, const CharT* chars, size_t length, 1541 const JSExternalStringCallbacks* callbacks); 1542 1543 public: 1544 static inline JSExternalString* new_( 1545 JSContext* cx, const JS::Latin1Char* chars, size_t length, 1546 const JSExternalStringCallbacks* callbacks); 1547 static inline JSExternalString* new_( 1548 JSContext* cx, const char16_t* chars, size_t length, 1549 const JSExternalStringCallbacks* callbacks); 1550 1551 const JSExternalStringCallbacks* callbacks() const { 1552 MOZ_ASSERT(JSString::isExternal()); 1553 return d.s.u3.externalCallbacks; 1554 } 1555 1556 // External chars are never allocated inline or in the nursery, so we can 1557 // safely expose this without requiring an AutoCheckCannotGC argument. 1558 const JS::Latin1Char* latin1Chars() const { return rawLatin1Chars(); } 1559 const char16_t* twoByteChars() const { return rawTwoByteChars(); } 1560 1561 // Only called by the GC for strings with the AllocKind::EXTERNAL_STRING 1562 // kind. 1563 inline void finalize(JS::GCContext* gcx); 1564 1565 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) 1566 void dumpOwnRepresentationFields(js::JSONPrinter& json) const; 1567 #endif 1568 }; 1569 1570 static_assert(sizeof(JSExternalString) == sizeof(JSString), 1571 "string subclasses must be binary-compatible with JSString"); 1572 1573 class JSAtom : public JSLinearString { 1574 /* Vacuous and therefore unimplemented. */ 1575 bool isAtom() const = delete; 1576 JSAtom& asAtom() const = delete; 1577 1578 public: 1579 template <typename CharT> 1580 static inline JSAtom* newValidLength(JSContext* cx, OwnedChars<CharT>& chars, 1581 js::HashNumber hash); 1582 1583 /* Returns the PropertyName for this. isIndex() must be false. */ 1584 inline js::PropertyName* asPropertyName(); 1585 1586 MOZ_ALWAYS_INLINE 1587 bool isPermanent() const { return JSString::isPermanentAtom(); } 1588 1589 MOZ_ALWAYS_INLINE 1590 void makePermanent() { 1591 MOZ_ASSERT(JSString::isAtom()); 1592 setFlagBit(PERMANENT_ATOM_MASK); 1593 } 1594 1595 MOZ_ALWAYS_INLINE bool isIndex() const { 1596 MOZ_ASSERT(JSString::isAtom()); 1597 mozilla::DebugOnly<uint32_t> index; 1598 MOZ_ASSERT(!!(flags() & ATOM_IS_INDEX_BIT) == isIndexSlow(&index)); 1599 return flags() & ATOM_IS_INDEX_BIT; 1600 } 1601 MOZ_ALWAYS_INLINE bool isIndex(uint32_t* index) const { 1602 MOZ_ASSERT(JSString::isAtom()); 1603 if (!isIndex()) { 1604 return false; 1605 } 1606 *index = hasIndexValue() ? getIndexValue() : getIndexSlow(); 1607 return true; 1608 } 1609 1610 uint32_t getIndexSlow() const; 1611 1612 void setIsIndex(uint32_t index) { 1613 MOZ_ASSERT(JSString::isAtom()); 1614 setFlagBit(ATOM_IS_INDEX_BIT); 1615 maybeInitializeIndexValue(index, /* allowAtom = */ true); 1616 } 1617 1618 MOZ_ALWAYS_INLINE bool isPinned() const { return flags() & PINNED_ATOM_BIT; } 1619 1620 void setPinned() { 1621 MOZ_ASSERT(!isPinned()); 1622 setFlagBit(PINNED_ATOM_BIT); 1623 } 1624 1625 inline js::HashNumber hash() const; 1626 1627 template <typename CharT> 1628 static bool lengthFitsInline(size_t length); 1629 1630 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) 1631 void dump(js::GenericPrinter& out); 1632 void dump(); 1633 #endif 1634 }; 1635 1636 namespace js { 1637 1638 class NormalAtom : public JSAtom { 1639 friend class gc::CellAllocator; 1640 1641 protected: 1642 static constexpr size_t ExtensionBytes = 1643 js::gc::CellAlignBytes - sizeof(js::HashNumber); 1644 1645 char inlineStorage_[ExtensionBytes]; 1646 HashNumber hash_; 1647 1648 // For subclasses to call. 1649 explicit NormalAtom(js::HashNumber hash) : hash_(hash) {} 1650 1651 // Out of line atoms, mimicking JSLinearString constructor. 1652 template <typename CharT> 1653 NormalAtom(const OwnedChars<CharT>& chars, js::HashNumber hash); 1654 1655 public: 1656 HashNumber hash() const { return hash_; } 1657 1658 static constexpr size_t offsetOfHash() { return offsetof(NormalAtom, hash_); } 1659 }; 1660 1661 static_assert(sizeof(NormalAtom) == 1662 js::RoundUp(sizeof(JSString) + sizeof(js::HashNumber), 1663 js::gc::CellAlignBytes), 1664 "NormalAtom must have size of a string + HashNumber, " 1665 "aligned to gc::CellAlignBytes"); 1666 1667 class ThinInlineAtom : public NormalAtom { 1668 friend class gc::CellAllocator; 1669 1670 public: 1671 static constexpr size_t MAX_LENGTH_LATIN1 = 1672 NUM_INLINE_CHARS_LATIN1 + ExtensionBytes / sizeof(JS::Latin1Char); 1673 static constexpr size_t MAX_LENGTH_TWO_BYTE = 1674 NUM_INLINE_CHARS_TWO_BYTE + ExtensionBytes / sizeof(char16_t); 1675 1676 #ifdef JS_64BIT 1677 // Fat and Thin inline atoms are the same size. Only use fat. 1678 static constexpr bool EverInstantiated = false; 1679 #else 1680 static constexpr bool EverInstantiated = true; 1681 #endif 1682 1683 protected: 1684 // Mimicking JSThinInlineString constructors. 1685 #ifdef JS_64BIT 1686 ThinInlineAtom(size_t length, JS::Latin1Char** chars, 1687 js::HashNumber hash) = delete; 1688 ThinInlineAtom(size_t length, char16_t** chars, js::HashNumber hash) = delete; 1689 #else 1690 ThinInlineAtom(size_t length, JS::Latin1Char** chars, js::HashNumber hash); 1691 ThinInlineAtom(size_t length, char16_t** chars, js::HashNumber hash); 1692 #endif 1693 1694 public: 1695 template <typename CharT> 1696 static bool lengthFits(size_t length) { 1697 if constexpr (sizeof(CharT) == sizeof(JS::Latin1Char)) { 1698 return length <= MAX_LENGTH_LATIN1; 1699 } else { 1700 return length <= MAX_LENGTH_TWO_BYTE; 1701 } 1702 } 1703 }; 1704 1705 // FatInlineAtom is basically a JSFatInlineString, except it has a hash value in 1706 // the last word that reduces the inline char storage. 1707 class FatInlineAtom : public JSAtom { 1708 friend class gc::CellAllocator; 1709 1710 // The space available for storing inline characters. It's the same amount of 1711 // space as a JSFatInlineString, except we take the hash value out of it. 1712 static constexpr size_t InlineBytes = sizeof(JSFatInlineString) - 1713 sizeof(JSString::Base) - 1714 sizeof(js::HashNumber); 1715 1716 static constexpr size_t ExtensionBytes = 1717 InlineBytes - JSThinInlineString::InlineBytes; 1718 1719 public: 1720 static constexpr size_t MAX_LENGTH_LATIN1 = 1721 InlineBytes / sizeof(JS::Latin1Char); 1722 static constexpr size_t MAX_LENGTH_TWO_BYTE = InlineBytes / sizeof(char16_t); 1723 1724 protected: // Silence Clang unused-field warning. 1725 char inlineStorage_[ExtensionBytes]; 1726 HashNumber hash_; 1727 1728 // Mimicking JSFatInlineString constructors. 1729 explicit FatInlineAtom(size_t length, JS::Latin1Char** chars, 1730 js::HashNumber hash); 1731 explicit FatInlineAtom(size_t length, char16_t** chars, js::HashNumber hash); 1732 1733 public: 1734 HashNumber hash() const { return hash_; } 1735 1736 inline void finalize(JS::GCContext* gcx); 1737 1738 static constexpr size_t offsetOfHash() { 1739 static_assert( 1740 sizeof(FatInlineAtom) == 1741 js::RoundUp(sizeof(JSThinInlineString) + 1742 FatInlineAtom::ExtensionBytes + sizeof(HashNumber), 1743 gc::CellAlignBytes), 1744 "FatInlineAtom must have size of a thin inline string + " 1745 "extension bytes if any + HashNumber, " 1746 "aligned to gc::CellAlignBytes"); 1747 1748 return offsetof(FatInlineAtom, hash_); 1749 } 1750 1751 template <typename CharT> 1752 static bool lengthFits(size_t length) { 1753 return length * sizeof(CharT) <= InlineBytes; 1754 } 1755 }; 1756 1757 static_assert(sizeof(FatInlineAtom) == sizeof(JSFatInlineString), 1758 "FatInlineAtom must be the same size as a fat inline string"); 1759 1760 // When an algorithm does not need a string represented as a single linear 1761 // array of characters, this range utility may be used to traverse the string a 1762 // sequence of linear arrays of characters. This avoids flattening ropes. 1763 template <size_t Size = 16> 1764 class StringSegmentRange { 1765 // If malloc() shows up in any profiles from this vector, we can add a new 1766 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx. 1767 using StackVector = JS::GCVector<JSString*, Size>; 1768 Rooted<StackVector> stack; 1769 Rooted<JSLinearString*> cur; 1770 1771 bool settle(JSString* str) { 1772 while (str->isRope()) { 1773 JSRope& rope = str->asRope(); 1774 if (!stack.append(rope.rightChild())) { 1775 return false; 1776 } 1777 str = rope.leftChild(); 1778 } 1779 cur = &str->asLinear(); 1780 return true; 1781 } 1782 1783 public: 1784 explicit StringSegmentRange(JSContext* cx) 1785 : stack(cx, StackVector(cx)), cur(cx) {} 1786 1787 [[nodiscard]] bool init(JSString* str) { 1788 MOZ_ASSERT(stack.empty()); 1789 return settle(str); 1790 } 1791 1792 bool empty() const { return cur == nullptr; } 1793 1794 JSLinearString* front() const { 1795 MOZ_ASSERT(!cur->isRope()); 1796 return cur; 1797 } 1798 1799 [[nodiscard]] bool popFront() { 1800 MOZ_ASSERT(!empty()); 1801 if (stack.empty()) { 1802 cur = nullptr; 1803 return true; 1804 } 1805 return settle(stack.popCopy()); 1806 } 1807 }; 1808 1809 // This class should be used in code that manipulates strings off-thread (for 1810 // example, Ion compilation). The key difference is that flags are loaded 1811 // atomically, preventing data races if flags (especially the pinned atom bit) 1812 // are mutated on the main thread. We use private inheritance to avoid 1813 // accidentally exposing anything non-thread-safe. 1814 class JSOffThreadAtom : private JSAtom { 1815 public: 1816 size_t length() const { return headerLengthFieldAtomic(); } 1817 size_t flags() const { return headerFlagsFieldAtomic(); } 1818 1819 bool empty() const { return length() == 0; } 1820 1821 bool hasLatin1Chars() const { return flags() & LATIN1_CHARS_BIT; } 1822 bool hasTwoByteChars() const { return !(flags() & LATIN1_CHARS_BIT); } 1823 1824 bool isAtom() const { return flags() & ATOM_BIT; } 1825 bool isInline() const { return flags() & INLINE_CHARS_BIT; } 1826 bool hasIndexValue() const { return flags() & INDEX_VALUE_BIT; } 1827 bool isIndex() const { return flags() & ATOM_IS_INDEX_BIT; } 1828 bool isFatInline() const { 1829 return (flags() & FAT_INLINE_MASK) == FAT_INLINE_MASK; 1830 } 1831 1832 uint32_t getIndexValue() const { 1833 MOZ_ASSERT(hasIndexValue()); 1834 return flags() >> INDEX_VALUE_SHIFT; 1835 } 1836 bool isIndex(uint32_t* index) const { 1837 if (!isIndex()) { 1838 return false; 1839 } 1840 *index = hasIndexValue() ? getIndexValue() : getIndexSlow(); 1841 return true; 1842 } 1843 uint32_t getIndexSlow() const; 1844 1845 const JS::Latin1Char* latin1Chars(const JS::AutoRequireNoGC& nogc) const { 1846 MOZ_ASSERT(hasLatin1Chars()); 1847 return isInline() ? d.inlineStorageLatin1 : d.s.u2.nonInlineCharsLatin1; 1848 }; 1849 const char16_t* twoByteChars(const JS::AutoRequireNoGC& nogc) const { 1850 MOZ_ASSERT(hasTwoByteChars()); 1851 return JSLinearString::twoByteChars(nogc); 1852 return isInline() ? d.inlineStorageTwoByte : d.s.u2.nonInlineCharsTwoByte; 1853 } 1854 mozilla::Range<const JS::Latin1Char> latin1Range( 1855 const JS::AutoRequireNoGC& nogc) const { 1856 return mozilla::Range<const JS::Latin1Char>(latin1Chars(nogc), length()); 1857 } 1858 mozilla::Range<const char16_t> twoByteRange( 1859 const JS::AutoRequireNoGC& nogc) const { 1860 return mozilla::Range<const char16_t>(twoByteChars(nogc), length()); 1861 } 1862 char16_t latin1OrTwoByteChar(size_t index) const { 1863 MOZ_ASSERT(index < length()); 1864 JS::AutoCheckCannotGC nogc; 1865 return hasLatin1Chars() ? latin1Chars(nogc)[index] 1866 : twoByteChars(nogc)[index]; 1867 } 1868 1869 inline HashNumber hash() const { 1870 if (isFatInline()) { 1871 return reinterpret_cast<const js::FatInlineAtom*>(this)->hash(); 1872 } 1873 return reinterpret_cast<const js::NormalAtom*>(this)->hash(); 1874 } 1875 1876 JSAtom* unwrap() { return this; } 1877 const JSAtom* unwrap() const { return this; } 1878 1879 // Should only be used to get an opaque pointer for baking into jitcode. 1880 const js::gc::Cell* raw() const { return this; } 1881 }; 1882 1883 } // namespace js 1884 1885 inline js::HashNumber JSAtom::hash() const { 1886 if (isFatInline()) { 1887 return static_cast<const js::FatInlineAtom*>(this)->hash(); 1888 } 1889 return static_cast<const js::NormalAtom*>(this)->hash(); 1890 } 1891 1892 namespace js { 1893 1894 /* 1895 * Represents an atomized string which does not contain an index (that is, an 1896 * unsigned 32-bit value). Thus for any PropertyName propname, 1897 * ToString(ToUint32(propname)) never equals propname. 1898 * 1899 * To more concretely illustrate the utility of PropertyName, consider that it 1900 * is used to partition, in a type-safe manner, the ways to refer to a 1901 * property, as follows: 1902 * 1903 * - uint32_t indexes, 1904 * - PropertyName strings which don't encode uint32_t indexes, 1905 * - Symbol, and 1906 * - JS::PropertyKey::isVoid. 1907 */ 1908 class PropertyName : public JSAtom { 1909 private: 1910 /* Vacuous and therefore unimplemented. */ 1911 PropertyName* asPropertyName() = delete; 1912 }; 1913 1914 static_assert(sizeof(PropertyName) == sizeof(JSString), 1915 "string subclasses must be binary-compatible with JSString"); 1916 1917 static MOZ_ALWAYS_INLINE jsid NameToId(PropertyName* name) { 1918 return JS::PropertyKey::NonIntAtom(name); 1919 } 1920 1921 using PropertyNameVector = JS::GCVector<PropertyName*>; 1922 1923 template <typename CharT> 1924 void CopyChars(CharT* dest, const JSLinearString& str); 1925 1926 static inline UniqueChars StringToNewUTF8CharsZ(JSContext* cx, JSString& str) { 1927 JS::AutoCheckCannotGC nogc; 1928 1929 JSLinearString* linear = str.ensureLinear(cx); 1930 if (!linear) { 1931 return nullptr; 1932 } 1933 1934 return UniqueChars( 1935 linear->hasLatin1Chars() 1936 ? JS::CharsToNewUTF8CharsZ(cx, linear->latin1Range(nogc)).c_str() 1937 : JS::CharsToNewUTF8CharsZ(cx, linear->twoByteRange(nogc)).c_str()); 1938 } 1939 1940 template <typename CharT> 1941 extern JSString::OwnedChars<CharT> AllocAtomCharsValidLength(JSContext* cx, 1942 size_t length); 1943 1944 /** 1945 * Allocate a string with the given contents. If |allowGC == CanGC|, this may 1946 * trigger a GC. 1947 */ 1948 template <js::AllowGC allowGC, typename CharT> 1949 extern JSLinearString* NewString(JSContext* cx, 1950 UniquePtr<CharT[], JS::FreePolicy> chars, 1951 size_t length, 1952 js::gc::Heap heap = js::gc::Heap::Default); 1953 1954 /* Like NewString, but doesn't try to deflate to Latin1. */ 1955 template <js::AllowGC allowGC, typename CharT> 1956 extern JSLinearString* NewStringDontDeflate( 1957 JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t length, 1958 js::gc::Heap heap = js::gc::Heap::Default); 1959 1960 /* This may return a static string/atom or an inline string. */ 1961 extern JSLinearString* NewDependentString( 1962 JSContext* cx, JSString* base, size_t start, size_t length, 1963 js::gc::Heap heap = js::gc::Heap::Default); 1964 1965 /* As above, but give an option to not contract the chain of base strings, in 1966 order to create messier situations for testing (some of which may not be 1967 possible in practice). */ 1968 extern JSLinearString* NewDependentStringForTesting( 1969 JSContext* cx, JSString* base, size_t start, size_t length, 1970 JS::ContractBaseChain contract, js::gc::Heap heap); 1971 1972 /* Take ownership of an array of Latin1Chars. */ 1973 extern JSLinearString* NewLatin1StringZ( 1974 JSContext* cx, UniqueChars chars, 1975 js::gc::Heap heap = js::gc::Heap::Default); 1976 1977 /* Copy a counted string and GC-allocate a descriptor for it. */ 1978 template <js::AllowGC allowGC, typename CharT> 1979 extern JSLinearString* NewStringCopyN( 1980 JSContext* cx, const CharT* s, size_t n, 1981 js::gc::Heap heap = js::gc::Heap::Default); 1982 1983 template <js::AllowGC allowGC> 1984 inline JSLinearString* NewStringCopyN( 1985 JSContext* cx, const char* s, size_t n, 1986 js::gc::Heap heap = js::gc::Heap::Default) { 1987 return NewStringCopyN<allowGC>(cx, reinterpret_cast<const Latin1Char*>(s), n, 1988 heap); 1989 } 1990 1991 template <typename CharT> 1992 extern JSAtom* NewAtomCopyNMaybeDeflateValidLength(JSContext* cx, 1993 const CharT* s, size_t n, 1994 js::HashNumber hash); 1995 1996 template <typename CharT> 1997 extern JSAtom* NewAtomCopyNDontDeflateValidLength(JSContext* cx, const CharT* s, 1998 size_t n, 1999 js::HashNumber hash); 2000 2001 /* Copy a counted string and GC-allocate a descriptor for it. */ 2002 template <js::AllowGC allowGC, typename CharT> 2003 inline JSLinearString* NewStringCopy( 2004 JSContext* cx, mozilla::Span<const CharT> s, 2005 js::gc::Heap heap = js::gc::Heap::Default) { 2006 return NewStringCopyN<allowGC>(cx, s.data(), s.size(), heap); 2007 } 2008 2009 /* Copy a counted string and GC-allocate a descriptor for it. */ 2010 template < 2011 js::AllowGC allowGC, typename CharT, 2012 typename std::enable_if_t<!std::is_same_v<CharT, unsigned char>>* = nullptr> 2013 inline JSLinearString* NewStringCopy( 2014 JSContext* cx, std::basic_string_view<CharT> s, 2015 js::gc::Heap heap = js::gc::Heap::Default) { 2016 return NewStringCopyN<allowGC>(cx, s.data(), s.size(), heap); 2017 } 2018 2019 /* Like NewStringCopyN, but doesn't try to deflate to Latin1. */ 2020 template <js::AllowGC allowGC, typename CharT> 2021 extern JSLinearString* NewStringCopyNDontDeflate( 2022 JSContext* cx, const CharT* s, size_t n, 2023 js::gc::Heap heap = js::gc::Heap::Default); 2024 2025 template <js::AllowGC allowGC, typename CharT> 2026 extern JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength( 2027 JSContext* cx, const CharT* s, size_t n, 2028 js::gc::Heap heap = js::gc::Heap::Default); 2029 2030 /* Copy a C string and GC-allocate a descriptor for it. */ 2031 template <js::AllowGC allowGC> 2032 inline JSLinearString* NewStringCopyZ( 2033 JSContext* cx, const char16_t* s, 2034 js::gc::Heap heap = js::gc::Heap::Default) { 2035 return NewStringCopyN<allowGC>(cx, s, js_strlen(s), heap); 2036 } 2037 2038 template <js::AllowGC allowGC> 2039 inline JSLinearString* NewStringCopyZ( 2040 JSContext* cx, const char* s, js::gc::Heap heap = js::gc::Heap::Default) { 2041 return NewStringCopyN<allowGC>(cx, s, strlen(s), heap); 2042 } 2043 2044 extern JSLinearString* NewStringCopyUTF8N( 2045 JSContext* cx, const JS::UTF8Chars& utf8, JS::SmallestEncoding encoding, 2046 js::gc::Heap heap = js::gc::Heap::Default); 2047 2048 extern JSLinearString* NewStringCopyUTF8N( 2049 JSContext* cx, const JS::UTF8Chars& utf8, 2050 js::gc::Heap heap = js::gc::Heap::Default); 2051 2052 inline JSLinearString* NewStringCopyUTF8Z( 2053 JSContext* cx, const JS::ConstUTF8CharsZ utf8, 2054 js::gc::Heap heap = js::gc::Heap::Default) { 2055 return NewStringCopyUTF8N( 2056 cx, JS::UTF8Chars(utf8.c_str(), strlen(utf8.c_str())), heap); 2057 } 2058 2059 template <typename CharT> 2060 JSString* NewMaybeExternalString(JSContext* cx, const CharT* s, size_t n, 2061 const JSExternalStringCallbacks* callbacks, 2062 bool* allocatedExternal, 2063 js::gc::Heap heap = js::gc::Heap::Default); 2064 2065 static_assert(sizeof(HashNumber) == 4); 2066 2067 template <AllowGC allowGC> 2068 extern JSString* ConcatStrings( 2069 JSContext* cx, typename MaybeRooted<JSString*, allowGC>::HandleType left, 2070 typename MaybeRooted<JSString*, allowGC>::HandleType right, 2071 js::gc::Heap heap = js::gc::Heap::Default); 2072 2073 /* 2074 * Test if strings are equal. The caller can call the function even if str1 2075 * or str2 are not GC-allocated things. 2076 */ 2077 extern bool EqualStrings(JSContext* cx, JSString* str1, JSString* str2, 2078 bool* result); 2079 2080 /* Use the infallible method instead! */ 2081 extern bool EqualStrings(JSContext* cx, JSLinearString* str1, 2082 JSLinearString* str2, bool* result) = delete; 2083 2084 /* EqualStrings is infallible on linear strings. */ 2085 extern bool EqualStrings(const JSLinearString* str1, 2086 const JSLinearString* str2); 2087 2088 /** 2089 * Compare two strings that are known to be the same length. 2090 * Exposed for the JITs; for ordinary uses, EqualStrings() is more sensible. 2091 * 2092 * The caller must have checked for the following cases that can be handled 2093 * efficiently without requiring a character comparison: 2094 * - str1 == str2 2095 * - str1->length() != str2->length() 2096 * - str1->isAtom() && str2->isAtom() 2097 */ 2098 extern bool EqualChars(const JSLinearString* str1, const JSLinearString* str2); 2099 2100 /* 2101 * Return less than, equal to, or greater than zero depending on whether 2102 * `s1[0..len1]` is less than, equal to, or greater than `s2`. 2103 */ 2104 extern int32_t CompareChars(const char16_t* s1, size_t len1, 2105 const JSLinearString* s2); 2106 2107 /* 2108 * Compare two strings, like CompareChars, but store the result in `*result`. 2109 * This flattens the strings and therefore can fail. 2110 */ 2111 extern bool CompareStrings(JSContext* cx, JSString* str1, JSString* str2, 2112 int32_t* result); 2113 2114 /* 2115 * Compare two strings, like CompareChars. 2116 */ 2117 extern int32_t CompareStrings(const JSLinearString* str1, 2118 const JSLinearString* str2); 2119 2120 /* 2121 * Compare two strings, like CompareChars. Can be called off-thread. 2122 */ 2123 extern int32_t CompareStrings(const JSOffThreadAtom* str1, 2124 const JSOffThreadAtom* str2); 2125 2126 /** 2127 * Return true if the string contains only ASCII characters. 2128 */ 2129 extern bool StringIsAscii(const JSLinearString* str); 2130 2131 /* 2132 * Return true if the string matches the given sequence of ASCII bytes. 2133 */ 2134 extern bool StringEqualsAscii(const JSLinearString* str, 2135 const char* asciiBytes); 2136 /* 2137 * Return true if the string matches the given sequence of ASCII 2138 * bytes. The sequence of ASCII bytes must have length "length". The 2139 * length should not include the trailing null, if any. 2140 */ 2141 extern bool StringEqualsAscii(const JSLinearString* str, const char* asciiBytes, 2142 size_t length); 2143 2144 template <size_t N> 2145 bool StringEqualsLiteral(const JSLinearString* str, 2146 const char (&asciiBytes)[N]) { 2147 MOZ_ASSERT(asciiBytes[N - 1] == '\0'); 2148 return StringEqualsAscii(str, asciiBytes, N - 1); 2149 } 2150 2151 extern int StringFindPattern(const JSLinearString* text, 2152 const JSLinearString* pat, size_t start); 2153 2154 /** 2155 * Return true if the string contains a pattern at |start|. 2156 * 2157 * Precondition: `text` is long enough that this might be true; 2158 * that is, it has at least `start + pat->length()` characters. 2159 */ 2160 extern bool HasSubstringAt(const JSLinearString* text, 2161 const JSLinearString* pat, size_t start); 2162 2163 /* 2164 * Computes |str|'s substring for the range [beginInt, beginInt + lengthInt). 2165 * Negative, overlarge, swapped, etc. |beginInt| and |lengthInt| are forbidden 2166 * and constitute API misuse. 2167 */ 2168 JSString* SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, 2169 int32_t lengthInt); 2170 2171 inline js::HashNumber HashStringChars(const JSLinearString* str) { 2172 JS::AutoCheckCannotGC nogc; 2173 size_t len = str->length(); 2174 return str->hasLatin1Chars() 2175 ? mozilla::HashString(str->latin1Chars(nogc), len) 2176 : mozilla::HashString(str->twoByteChars(nogc), len); 2177 } 2178 2179 /** 2180 * Allocate string characters when the final string length is known in advance. 2181 */ 2182 template <typename CharT> 2183 class MOZ_NON_PARAM StringChars { 2184 static constexpr size_t InlineLength = 2185 std::is_same_v<CharT, JS::Latin1Char> 2186 ? JSFatInlineString::MAX_LENGTH_LATIN1 2187 : JSFatInlineString::MAX_LENGTH_TWO_BYTE; 2188 2189 CharT inlineChars_[InlineLength]; 2190 Rooted<JSString::OwnedChars<CharT>> ownedChars_; 2191 2192 #ifdef DEBUG 2193 // In debug mode, we keep track of the requested string lengths to ensure all 2194 // methods are called in the correct order and with the expected argument 2195 // values. 2196 size_t lastRequestedLength_ = 0; 2197 2198 void assertValidRequest(size_t expectedLastLength, size_t length) { 2199 MOZ_ASSERT(length >= expectedLastLength, "cannot shrink requested length"); 2200 MOZ_ASSERT(lastRequestedLength_ == expectedLastLength); 2201 lastRequestedLength_ = length; 2202 } 2203 #else 2204 void assertValidRequest(size_t expectedLastLength, size_t length) {} 2205 #endif 2206 2207 public: 2208 explicit StringChars(JSContext* cx) : ownedChars_(cx) {} 2209 2210 /** 2211 * Return a raw pointer to the string characters. The pointer can point to 2212 * nursery allocated memory, so the caller should ensure no GC can happen 2213 * while using this pointer. 2214 */ 2215 CharT* data(const JS::AutoRequireNoGC&) { 2216 return ownedChars_ ? ownedChars_.data() : inlineChars_; 2217 } 2218 2219 /** 2220 * Escape hatch when it's not possible to call `data(nogc)`. Use with caution! 2221 */ 2222 CharT* unsafeData() { 2223 return ownedChars_ ? ownedChars_.data() : inlineChars_; 2224 } 2225 2226 /** 2227 * Prepare for writing |length| characters. Allocates iff `length` exceeds the 2228 * inline storage of this class. 2229 */ 2230 bool maybeAlloc(JSContext* cx, size_t length, 2231 gc::Heap heap = gc::Heap::Default); 2232 2233 /** 2234 * Increase the string characters storage. Allocates iff `newLength` exceeds 2235 * the inline storage of this class. 2236 */ 2237 bool maybeRealloc(JSContext* cx, size_t oldLength, size_t newLength, 2238 gc::Heap heap = gc::Heap::Default); 2239 2240 /** 2241 * Build the result string. Does not deflate two-byte characters if all 2242 * characters fit into Latin-1. 2243 */ 2244 template <AllowGC allowGC> 2245 JSLinearString* toStringDontDeflate(JSContext* cx, size_t length, 2246 gc::Heap heap = gc::Heap::Default); 2247 2248 /** 2249 * Build the result string. Does not deflate two-byte characters if all 2250 * characters fit into Latin-1. And does not check static strings. 2251 */ 2252 template <AllowGC allowGC> 2253 JSLinearString* toStringDontDeflateNonStatic( 2254 JSContext* cx, size_t length, gc::Heap heap = gc::Heap::Default); 2255 }; 2256 2257 /** 2258 * Allocate atom characters when the final string length is known in advance. 2259 */ 2260 template <typename CharT> 2261 class MOZ_NON_PARAM AtomStringChars { 2262 static constexpr size_t InlineLength = 2263 std::is_same_v<CharT, JS::Latin1Char> 2264 ? JSFatInlineString::MAX_LENGTH_LATIN1 2265 : JSFatInlineString::MAX_LENGTH_TWO_BYTE; 2266 2267 CharT inlineChars_[InlineLength]; 2268 UniquePtr<CharT[], JS::FreePolicy> mallocChars_; 2269 2270 #ifdef DEBUG 2271 // In debug mode, we keep track of the requested string lengths to ensure all 2272 // methods are called in the correct order and with the expected argument 2273 // values. 2274 size_t lastRequestedLength_ = 0; 2275 2276 void assertValidRequest(size_t expectedLastLength, size_t length) { 2277 MOZ_ASSERT(length >= expectedLastLength, "cannot shrink requested length"); 2278 MOZ_ASSERT(lastRequestedLength_ == expectedLastLength); 2279 lastRequestedLength_ = length; 2280 } 2281 #else 2282 void assertValidRequest(size_t expectedLastLength, size_t length) {} 2283 #endif 2284 2285 public: 2286 /** 2287 * Return a raw pointer to the string characters. 2288 */ 2289 CharT* data() { return mallocChars_ ? mallocChars_.get() : inlineChars_; } 2290 2291 /** 2292 * Prepare for writing |length| characters. Allocates iff `length` exceeds the 2293 * inline storage of this class. 2294 */ 2295 bool maybeAlloc(JSContext* cx, size_t length); 2296 2297 /** 2298 * Build the result atom string. 2299 */ 2300 JSAtom* toAtom(JSContext* cx, size_t length); 2301 }; 2302 2303 /*** Conversions ************************************************************/ 2304 2305 /* 2306 * Convert a string to a printable C string. 2307 * 2308 * Asserts if the input contains any non-ASCII characters. 2309 */ 2310 UniqueChars EncodeAscii(JSContext* cx, JSString* str); 2311 2312 /* 2313 * Convert a string to a printable C string. 2314 */ 2315 UniqueChars EncodeLatin1(JSContext* cx, JSString* str); 2316 2317 enum class IdToPrintableBehavior : bool { 2318 /* 2319 * Request the printable representation of an identifier. 2320 */ 2321 IdIsIdentifier, 2322 2323 /* 2324 * Request the printable representation of a property key. 2325 */ 2326 IdIsPropertyKey 2327 }; 2328 2329 /* 2330 * Convert a jsid to a printable C string encoded in UTF-8. 2331 */ 2332 extern UniqueChars IdToPrintableUTF8(JSContext* cx, HandleId id, 2333 IdToPrintableBehavior behavior); 2334 2335 /* 2336 * Convert a non-string value to a string, returning null after reporting an 2337 * error, otherwise returning a new string reference. 2338 */ 2339 template <AllowGC allowGC> 2340 extern JSString* ToStringSlow( 2341 JSContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg); 2342 2343 /* 2344 * Convert the given value to a string. This method includes an inline 2345 * fast-path for the case where the value is already a string; if the value is 2346 * known not to be a string, use ToStringSlow instead. 2347 */ 2348 template <AllowGC allowGC> 2349 static MOZ_ALWAYS_INLINE JSString* ToString(JSContext* cx, JS::HandleValue v) { 2350 if (v.isString()) { 2351 return v.toString(); 2352 } 2353 return ToStringSlow<allowGC>(cx, v); 2354 } 2355 2356 /* 2357 * This function implements E-262-3 section 9.8, toString. Convert the given 2358 * value to a string of characters appended to the given builder. On error, the 2359 * passed builder may have partial results appended. 2360 */ 2361 inline bool ValueToStringBuilder(JSContext* cx, const Value& v, 2362 StringBuilder& sb); 2363 2364 } /* namespace js */ 2365 2366 MOZ_ALWAYS_INLINE bool JSString::getChar(JSContext* cx, size_t index, 2367 char16_t* code) { 2368 MOZ_ASSERT(index < length()); 2369 2370 /* 2371 * Optimization for one level deep ropes. 2372 * This is common for the following pattern: 2373 * 2374 * while() { 2375 * text = text.substr(0, x) + "bla" + text.substr(x) 2376 * test.charCodeAt(x + 1) 2377 * } 2378 * 2379 * Note: keep this in sync with MacroAssembler::loadStringChar and 2380 * CanAttachStringChar. 2381 */ 2382 JSString* str; 2383 if (isRope()) { 2384 JSRope* rope = &asRope(); 2385 if (uint32_t(index) < rope->leftChild()->length()) { 2386 str = rope->leftChild(); 2387 } else { 2388 str = rope->rightChild(); 2389 index -= rope->leftChild()->length(); 2390 } 2391 } else { 2392 str = this; 2393 } 2394 2395 if (!str->ensureLinear(cx)) { 2396 return false; 2397 } 2398 2399 *code = str->asLinear().latin1OrTwoByteChar(index); 2400 return true; 2401 } 2402 2403 MOZ_ALWAYS_INLINE bool JSString::getCodePoint(JSContext* cx, size_t index, 2404 char32_t* code) { 2405 // C++ implementation of https://tc39.es/ecma262/#sec-codepointat 2406 size_t size = length(); 2407 MOZ_ASSERT(index < size); 2408 2409 char16_t first; 2410 if (!getChar(cx, index, &first)) { 2411 return false; 2412 } 2413 if (!js::unicode::IsLeadSurrogate(first) || index + 1 == size) { 2414 *code = first; 2415 return true; 2416 } 2417 2418 char16_t second; 2419 if (!getChar(cx, index + 1, &second)) { 2420 return false; 2421 } 2422 if (!js::unicode::IsTrailSurrogate(second)) { 2423 *code = first; 2424 return true; 2425 } 2426 2427 *code = js::unicode::UTF16Decode(first, second); 2428 return true; 2429 } 2430 2431 MOZ_ALWAYS_INLINE JSLinearString* JSString::ensureLinear(JSContext* cx) { 2432 return isLinear() ? &asLinear() : asRope().flatten(cx); 2433 } 2434 2435 inline JSLinearString* JSString::base() const { 2436 MOZ_ASSERT(hasBase()); 2437 MOZ_ASSERT_IF(!isAtomRef(), !d.s.u3.base->isInline()); 2438 MOZ_ASSERT(d.s.u3.base->assertIsValidBase()); 2439 if (isAtomRef()) { 2440 return static_cast<JSLinearString*>(d.s.u3.atom); 2441 } 2442 return d.s.u3.base; 2443 } 2444 2445 inline JSAtom* JSString::atom() const { 2446 MOZ_ASSERT(isAtomRef()); 2447 return d.s.u3.atom; 2448 } 2449 2450 inline JSLinearString* JSString::nurseryBaseOrRelocOverlay() const { 2451 MOZ_ASSERT(hasBase()); 2452 return d.s.u3.base; 2453 } 2454 2455 inline bool JSString::canOwnDependentChars() const { 2456 // A string that could own the malloced chars used by another (dependent) 2457 // string. It will not have a base and must be linear and non-inline. 2458 return isLinear() && !isInline() && !hasBase(); 2459 } 2460 2461 template <> 2462 MOZ_ALWAYS_INLINE const char16_t* JSLinearString::nonInlineChars( 2463 const JS::AutoRequireNoGC& nogc) const { 2464 return nonInlineTwoByteChars(nogc); 2465 } 2466 2467 template <> 2468 MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::nonInlineChars( 2469 const JS::AutoRequireNoGC& nogc) const { 2470 return nonInlineLatin1Chars(nogc); 2471 } 2472 2473 template <> 2474 MOZ_ALWAYS_INLINE const char16_t* JSLinearString::chars( 2475 const JS::AutoRequireNoGC& nogc) const { 2476 return rawTwoByteChars(); 2477 } 2478 2479 template <> 2480 MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::chars( 2481 const JS::AutoRequireNoGC& nogc) const { 2482 return rawLatin1Chars(); 2483 } 2484 2485 template <> 2486 MOZ_ALWAYS_INLINE js::UniquePtr<JS::Latin1Char[], JS::FreePolicy> 2487 JSRope::copyChars<JS::Latin1Char>(JSContext* maybecx, 2488 arena_id_t destArenaId) const { 2489 return copyLatin1Chars(maybecx, destArenaId); 2490 } 2491 2492 template <> 2493 MOZ_ALWAYS_INLINE JS::UniqueTwoByteChars JSRope::copyChars<char16_t>( 2494 JSContext* maybecx, arena_id_t destArenaId) const { 2495 return copyTwoByteChars(maybecx, destArenaId); 2496 } 2497 2498 template <> 2499 MOZ_ALWAYS_INLINE bool JSThinInlineString::lengthFits<JS::Latin1Char>( 2500 size_t length) { 2501 return length <= MAX_LENGTH_LATIN1; 2502 } 2503 2504 template <> 2505 MOZ_ALWAYS_INLINE bool JSThinInlineString::lengthFits<char16_t>(size_t length) { 2506 return length <= MAX_LENGTH_TWO_BYTE; 2507 } 2508 2509 template <> 2510 MOZ_ALWAYS_INLINE bool JSFatInlineString::lengthFits<JS::Latin1Char>( 2511 size_t length) { 2512 static_assert( 2513 (INLINE_EXTENSION_CHARS_LATIN1 * sizeof(char)) % js::gc::CellAlignBytes == 2514 0, 2515 "fat inline strings' Latin1 characters don't exactly " 2516 "fill subsequent cells and thus are wasteful"); 2517 static_assert(MAX_LENGTH_LATIN1 == 2518 (sizeof(JSFatInlineString) - 2519 offsetof(JSFatInlineString, d.inlineStorageLatin1)) / 2520 sizeof(char), 2521 "MAX_LENGTH_LATIN1 must be one less than inline Latin1 " 2522 "storage count"); 2523 2524 return length <= MAX_LENGTH_LATIN1; 2525 } 2526 2527 template <> 2528 MOZ_ALWAYS_INLINE bool JSFatInlineString::lengthFits<char16_t>(size_t length) { 2529 static_assert((INLINE_EXTENSION_CHARS_TWO_BYTE * sizeof(char16_t)) % 2530 js::gc::CellAlignBytes == 2531 0, 2532 "fat inline strings' char16_t characters don't exactly " 2533 "fill subsequent cells and thus are wasteful"); 2534 static_assert(MAX_LENGTH_TWO_BYTE == 2535 (sizeof(JSFatInlineString) - 2536 offsetof(JSFatInlineString, d.inlineStorageTwoByte)) / 2537 sizeof(char16_t), 2538 "MAX_LENGTH_TWO_BYTE must be one less than inline " 2539 "char16_t storage count"); 2540 2541 return length <= MAX_LENGTH_TWO_BYTE; 2542 } 2543 2544 template <> 2545 MOZ_ALWAYS_INLINE bool JSInlineString::lengthFits<JS::Latin1Char>( 2546 size_t length) { 2547 // If it fits in a fat inline string, it fits in any inline string. 2548 return JSFatInlineString::lengthFits<JS::Latin1Char>(length); 2549 } 2550 2551 template <> 2552 MOZ_ALWAYS_INLINE bool JSInlineString::lengthFits<char16_t>(size_t length) { 2553 // If it fits in a fat inline string, it fits in any inline string. 2554 return JSFatInlineString::lengthFits<char16_t>(length); 2555 } 2556 2557 template <> 2558 MOZ_ALWAYS_INLINE bool js::ThinInlineAtom::lengthFits<JS::Latin1Char>( 2559 size_t length) { 2560 return length <= MAX_LENGTH_LATIN1; 2561 } 2562 2563 template <> 2564 MOZ_ALWAYS_INLINE bool js::ThinInlineAtom::lengthFits<char16_t>(size_t length) { 2565 return length <= MAX_LENGTH_TWO_BYTE; 2566 } 2567 2568 template <> 2569 MOZ_ALWAYS_INLINE bool js::FatInlineAtom::lengthFits<JS::Latin1Char>( 2570 size_t length) { 2571 return length <= MAX_LENGTH_LATIN1; 2572 } 2573 2574 template <> 2575 MOZ_ALWAYS_INLINE bool js::FatInlineAtom::lengthFits<char16_t>(size_t length) { 2576 return length <= MAX_LENGTH_TWO_BYTE; 2577 } 2578 2579 template <> 2580 MOZ_ALWAYS_INLINE bool JSAtom::lengthFitsInline<JS::Latin1Char>(size_t length) { 2581 // If it fits in a fat inline atom, it fits in any inline atom. 2582 return js::FatInlineAtom::lengthFits<JS::Latin1Char>(length); 2583 } 2584 2585 template <> 2586 MOZ_ALWAYS_INLINE bool JSAtom::lengthFitsInline<char16_t>(size_t length) { 2587 // If it fits in a fat inline atom, it fits in any inline atom. 2588 return js::FatInlineAtom::lengthFits<char16_t>(length); 2589 } 2590 2591 template <> 2592 MOZ_ALWAYS_INLINE void JSString::setNonInlineChars(const char16_t* chars, 2593 bool usesStringBuffer) { 2594 // Check that the new buffer is located in the StringBufferArena 2595 if (!(isAtomRef() && atom()->isInline())) { 2596 checkStringCharsArena(chars, usesStringBuffer); 2597 } 2598 d.s.u2.nonInlineCharsTwoByte = chars; 2599 } 2600 2601 template <> 2602 MOZ_ALWAYS_INLINE void JSString::setNonInlineChars(const JS::Latin1Char* chars, 2603 bool usesStringBuffer) { 2604 // Check that the new buffer is located in the StringBufferArena 2605 if (!(isAtomRef() && atom()->isInline())) { 2606 checkStringCharsArena(chars, usesStringBuffer); 2607 } 2608 d.s.u2.nonInlineCharsLatin1 = chars; 2609 } 2610 2611 MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::rawLatin1Chars() const { 2612 MOZ_ASSERT(JSString::isLinear()); 2613 MOZ_ASSERT(hasLatin1Chars()); 2614 return isInline() ? d.inlineStorageLatin1 : d.s.u2.nonInlineCharsLatin1; 2615 } 2616 2617 MOZ_ALWAYS_INLINE const char16_t* JSLinearString::rawTwoByteChars() const { 2618 MOZ_ASSERT(JSString::isLinear()); 2619 MOZ_ASSERT(hasTwoByteChars()); 2620 return isInline() ? d.inlineStorageTwoByte : d.s.u2.nonInlineCharsTwoByte; 2621 } 2622 2623 inline js::PropertyName* JSAtom::asPropertyName() { 2624 MOZ_ASSERT(!isIndex()); 2625 return static_cast<js::PropertyName*>(this); 2626 } 2627 2628 inline bool JSLinearString::isIndex(uint32_t* indexp) const { 2629 MOZ_ASSERT(JSString::isLinear()); 2630 2631 if (isAtom()) { 2632 return asAtom().isIndex(indexp); 2633 } 2634 2635 if (JSString::hasIndexValue()) { 2636 *indexp = getIndexValue(); 2637 return true; 2638 } 2639 2640 return isIndexSlow(indexp); 2641 } 2642 2643 namespace js { 2644 namespace gc { 2645 template <> 2646 inline JSString* Cell::as<JSString>() { 2647 MOZ_ASSERT(is<JSString>()); 2648 return reinterpret_cast<JSString*>(this); 2649 } 2650 2651 template <> 2652 inline JSString* TenuredCell::as<JSString>() { 2653 MOZ_ASSERT(is<JSString>()); 2654 return reinterpret_cast<JSString*>(this); 2655 } 2656 2657 } // namespace gc 2658 } // namespace js 2659 2660 #endif /* vm_StringType_h */