tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

StringType.h (93463B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef vm_StringType_h
      8 #define vm_StringType_h
      9 
     10 #include "mozilla/Maybe.h"
     11 #include "mozilla/MemoryReporting.h"
     12 #include "mozilla/Range.h"
     13 #include "mozilla/RefPtr.h"
     14 #include "mozilla/Span.h"
     15 #include "mozilla/StringBuffer.h"
     16 #include "mozilla/TextUtils.h"
     17 
     18 #include <string_view>  // std::basic_string_view
     19 
     20 #include "jstypes.h"  // js::Bit
     21 
     22 #include "gc/Cell.h"
     23 #include "gc/MaybeRooted.h"
     24 #include "gc/Nursery.h"
     25 #include "gc/RelocationOverlay.h"
     26 #include "gc/StoreBuffer.h"
     27 #include "js/CharacterEncoding.h"
     28 #include "js/RootingAPI.h"
     29 #include "js/shadow/String.h"  // JS::shadow::String
     30 #include "js/String.h"         // JS::MaxStringLength
     31 #include "js/UniquePtr.h"
     32 #include "util/Text.h"
     33 
     34 class JSDependentString;
     35 class JSExtensibleString;
     36 class JSExternalString;
     37 class JSInlineString;
     38 class JSRope;
     39 
     40 namespace JS {
     41 class JS_PUBLIC_API AutoStableStringChars;
     42 }  // namespace JS
     43 
     44 namespace js {
     45 
     46 class ArrayObject;
     47 class JS_PUBLIC_API GenericPrinter;
     48 class JSONPrinter;
     49 class PropertyName;
     50 class StringBuilder;
     51 class JSOffThreadAtom;
     52 
     53 namespace frontend {
     54 class ParserAtomsTable;
     55 class TaggedParserAtomIndex;
     56 class WellKnownParserAtoms;
     57 struct CompilationAtomCache;
     58 }  // namespace frontend
     59 
     60 namespace jit {
     61 class MacroAssembler;
     62 }  // namespace jit
     63 
     64 /* The buffer length required to contain any unsigned 32-bit integer. */
     65 static const size_t UINT32_CHAR_BUFFER_LENGTH = sizeof("4294967295") - 1;
     66 
     67 // Maximum array index. This value is defined in the spec (ES2021 draft, 6.1.7):
     68 //
     69 //   An array index is an integer index whose numeric value i is in the range
     70 //   +0𝔽 ≤ i < 𝔽(2^32 - 1).
     71 const uint32_t MAX_ARRAY_INDEX = 4294967294u;  // 2^32-2 (= UINT32_MAX-1)
     72 
     73 // Returns true if the characters of `s` store an unsigned 32-bit integer value
     74 // less than or equal to MAX_ARRAY_INDEX, initializing `*indexp` to that value
     75 // if so. Leading '0' isn't allowed except 0 itself.
     76 template <typename CharT>
     77 bool CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp);
     78 
     79 } /* namespace js */
     80 
     81 // clang-format off
     82 /*
     83 * [SMDOC] JavaScript Strings
     84 *
     85 * Conceptually, a JS string is just an array of chars and a length. This array
     86 * of chars may or may not be null-terminated and, if it is, the null character
     87 * is not included in the length.
     88 *
     89 * To improve performance of common operations, the following optimizations are
     90 * made which affect the engine's representation of strings:
     91 *
     92 *  - The plain vanilla representation is a "linear" string which consists of a
     93 *    string header in the GC heap and a malloc'd char array.
     94 *
     95 *  - To avoid copying a substring of an existing "base" string , a "dependent"
     96 *    string (JSDependentString) can be created which points into the base
     97 *    string's char array.
     98 *
     99 *  - To avoid O(n^2) char buffer copying, a "rope" node (JSRope) can be created
    100 *    to represent a delayed string concatenation. Concatenation (called
    101 *    flattening) is performed if and when a linear char array is requested. In
    102 *    general, ropes form a binary dag whose internal nodes are JSRope string
    103 *    headers with no associated char array and whose leaf nodes are linear
    104 *    strings.
    105 *
    106 *  - To avoid copying the leftmost string when flattening, we may produce an
    107 *    "extensible" string, which tracks not only its actual length but also its
    108 *    buffer's overall size. If such an "extensible" string appears as the
    109 *    leftmost string in a subsequent flatten, and its buffer has enough unused
    110 *    space, we can simply flatten the rest of the ropes into its buffer,
    111 *    leaving its text in place. We then transfer ownership of its buffer to the
    112 *    flattened rope, and mutate the donor extensible string into a dependent
    113 *    string referencing its original buffer.
    114 *
    115 *    (The term "extensible" does not imply that we ever 'realloc' the buffer.
    116 *    Extensible strings may have dependent strings pointing into them, and the
    117 *    JSAPI hands out pointers to linear strings' buffers, so resizing with
    118 *    'realloc' is generally not possible.)
    119 *
    120 *  - To avoid allocating small char arrays, short strings can be stored inline
    121 *    in the string header (JSInlineString). These come in two flavours:
    122 *    JSThinInlineString, which is the same size as JSString; and
    123 *    JSFatInlineString, which has a larger header and so can fit more chars.
    124 *
    125 *  - To avoid comparing O(n) string equality comparison, strings can be
    126 *    canonicalized to "atoms" (JSAtom) such that there is a single atom with a
    127 *    given (length,chars).
    128 *
    129 *  - To avoid copying all strings created through the JSAPI, an "external"
    130 *    string (JSExternalString) can be created whose chars are managed by the
    131 *    JSAPI client.
    132 *
    133 *  - To avoid using two bytes per character for every string, string
    134 *    characters are stored as Latin1 instead of TwoByte if all characters are
    135 *    representable in Latin1.
    136 *
    137 *  - To avoid slow conversions from strings to integer indexes, we cache 16 bit
    138 *    unsigned indexes on strings representing such numbers.
    139 *
    140 * Although all strings share the same basic memory layout, we can conceptually
    141 * arrange them into a hierarchy of operations/invariants and represent this
    142 * hierarchy in C++ with classes:
    143 *
    144 * C++ type                     operations+fields / invariants+properties
    145 * ==========================   =========================================
    146 * JSString (abstract)          get(Latin1|TwoByte)CharsZ, get(Latin1|TwoByte)Chars, length / -
    147 *  | \
    148 *  | JSRope                    leftChild, rightChild / -
    149 *  |
    150 * JSLinearString               latin1Chars, twoByteChars / -
    151 *  |
    152 *  +-- JSDependentString       base / -
    153 *  |   |
    154 *  |   +-- JSAtomRefString     - / base points to an atom
    155 *  |
    156 *  +-- JSExternalString        - / char array memory managed by embedding
    157 *  |
    158 *  +-- JSExtensibleString      - / tracks total buffer capacity (including current text)
    159 *  |
    160 *  +-- JSInlineString (abstract) - / chars stored in header
    161 *  |   |
    162 *  |   +-- JSThinInlineString  - / header is normal
    163 *  |   |
    164 *  |   +-- JSFatInlineString   - / header is fat
    165 *  |
    166 * JSAtom (abstract)            - / string equality === pointer equality
    167 *  |  |
    168 *  |  +-- js::NormalAtom       JSLinearString + atom hash code / -
    169 *  |  |   |
    170 *  |  |   +-- js::ThinInlineAtom
    171 *  |  |                        possibly larger JSThinInlineString + atom hash code / -
    172 *  |  |
    173 *  |  +-- js::FatInlineAtom    JSFatInlineString w/atom hash code / -
    174 *  |
    175 * js::PropertyName             - / chars don't contain an index (uint32_t)
    176 *
    177 * Classes marked with (abstract) above are not literally C++ Abstract Base
    178 * Classes (since there are no virtual functions, pure or not, in this
    179 * hierarchy), but have the same meaning: there are no strings with this type as
    180 * its most-derived type.
    181 *
    182 * Atoms can additionally be permanent, i.e. unable to be collected, and can
    183 * be combined with other string types to create additional most-derived types
    184 * that satisfy the invariants of more than one of the abovementioned
    185 * most-derived types. Furthermore, each atom stores a hash number (based on its
    186 * chars). This hash number is used as key in the atoms table and when the atom
    187 * is used as key in a JS Map/Set.
    188 *
    189 * Derived string types can be queried from ancestor types via isX() and
    190 * retrieved with asX() debug-only-checked casts.
    191 *
    192 * The ensureX() operations mutate 'this' in place to effectively make the type
    193 * be at least X (e.g., ensureLinear will change a JSRope to be a JSLinearString).
    194 */
    195 // clang-format on
    196 
    197 class JSString : public js::gc::CellWithLengthAndFlags {
    198 protected:
    199  using Base = js::gc::CellWithLengthAndFlags;
    200 
    201  static const size_t NUM_INLINE_CHARS_LATIN1 =
    202      2 * sizeof(void*) / sizeof(JS::Latin1Char);
    203  static const size_t NUM_INLINE_CHARS_TWO_BYTE =
    204      2 * sizeof(void*) / sizeof(char16_t);
    205 
    206 public:
    207  // String length and flags are stored in the cell header.
    208  MOZ_ALWAYS_INLINE
    209  size_t length() const { return headerLengthField(); }
    210  MOZ_ALWAYS_INLINE
    211  uint32_t flags() const { return headerFlagsField(); }
    212 
    213  // Class for temporarily holding character data that will be used for JSString
    214  // contents. The data may be allocated in the nursery, the malloc heap, or as
    215  // a StringBuffer. The class instance must be passed to the JSString
    216  // constructor as a MutableHandle, so that if a GC occurs between the
    217  // construction of the content and the construction of the JSString Cell to
    218  // hold it, the contents can be transparently moved to the malloc heap before
    219  // the nursery is reset.
    220  template <typename CharT>
    221  class OwnedChars {
    222   public:
    223    enum class Kind {
    224      // Not owning any chars. chars_ should not be used.
    225      Uninitialized,
    226 
    227      // chars_ is a buffer allocated in the nursery.
    228      Nursery,
    229 
    230      // chars_ is a buffer allocated in the malloc heap. This pointer should be
    231      // passed to js_free() if OwnedChars dies while still possessing
    232      // ownership.
    233      Malloc,
    234 
    235      // chars_ is allocated as a refcounted StringBuffer. The reference must be
    236      // released if OwnedChars dies while still possessing ownership.
    237      StringBuffer,
    238    };
    239 
    240   private:
    241    mozilla::Span<CharT> chars_;
    242    Kind kind_ = Kind::Uninitialized;
    243 
    244   public:
    245    OwnedChars() = default;
    246    OwnedChars(CharT* chars, size_t length, Kind kind);
    247    OwnedChars(js::UniquePtr<CharT[], JS::FreePolicy>&& chars, size_t length);
    248    OwnedChars(RefPtr<mozilla::StringBuffer>&& buffer, size_t length);
    249    OwnedChars(OwnedChars&&);
    250    OwnedChars(const OwnedChars&) = delete;
    251    ~OwnedChars() { reset(); }
    252 
    253    OwnedChars& operator=(OwnedChars&&);
    254    OwnedChars& operator=(const OwnedChars&) = delete;
    255 
    256    explicit operator bool() const {
    257      MOZ_ASSERT_IF(kind_ != Kind::Uninitialized, !chars_.empty());
    258      return kind_ != Kind::Uninitialized;
    259    }
    260    mozilla::Span<CharT> span() const {
    261      MOZ_ASSERT(kind_ != Kind::Uninitialized);
    262      return chars_;
    263    }
    264    CharT* data() const {
    265      MOZ_ASSERT(kind_ != Kind::Uninitialized);
    266      return chars_.data();
    267    }
    268    size_t length() const {
    269      MOZ_ASSERT(kind_ != Kind::Uninitialized);
    270      return chars_.Length();
    271    }
    272    size_t size() const { return length() * sizeof(CharT); }
    273    bool isMalloced() const { return kind_ == Kind::Malloc; }
    274    bool hasStringBuffer() const { return kind_ == Kind::StringBuffer; }
    275 
    276    // Return the data and release ownership to the caller.
    277    inline CharT* release();
    278    // Discard any owned data.
    279    inline void reset();
    280    // Move any nursery data into the malloc heap.
    281    inline void ensureNonNursery();
    282 
    283    // If we GC with a live OwnedChars, copy the data out of the nursery to a
    284    // safely malloced location.
    285    void trace(JSTracer* trc) { ensureNonNursery(); }
    286  };
    287 
    288 protected:
    289  /* Fields only apply to string types commented on the right. */
    290  struct Data {
    291    // Note: 32-bit length and flags fields are inherited from
    292    // CellWithLengthAndFlags.
    293 
    294    union {
    295      union {
    296        /* JS(Fat)InlineString */
    297        JS::Latin1Char inlineStorageLatin1[NUM_INLINE_CHARS_LATIN1];
    298        char16_t inlineStorageTwoByte[NUM_INLINE_CHARS_TWO_BYTE];
    299      };
    300      struct {
    301        union {
    302          const JS::Latin1Char* nonInlineCharsLatin1; /* JSLinearString, except
    303                                                         JS(Fat)InlineString */
    304          const char16_t* nonInlineCharsTwoByte;      /* JSLinearString, except
    305                                                         JS(Fat)InlineString */
    306          JSString* left;                             /* JSRope */
    307          JSRope* parent;                             /* Used in flattening */
    308        } u2;
    309        union {
    310          JSLinearString* base; /* JSDependentString */
    311          JSAtom* atom;         /* JSAtomRefString */
    312          JSString* right;      /* JSRope */
    313          size_t capacity;      /* JSLinearString (extensible) */
    314          const JSExternalStringCallbacks*
    315              externalCallbacks; /* JSExternalString */
    316        } u3;
    317      } s;
    318    };
    319  } d;
    320 
    321 public:
    322  /* Flags exposed only for jits */
    323 
    324  /*
    325   * Flag Encoding
    326   *
    327   * The first word of a JSString stores flags, index, and (on some
    328   * platforms) the length. The flags store both the string's type and its
    329   * character encoding.
    330   *
    331   * If LATIN1_CHARS_BIT is set, the string's characters are stored as Latin1
    332   * instead of TwoByte. This flag can also be set for ropes, if both the
    333   * left and right nodes are Latin1. Flattening will result in a Latin1
    334   * string in this case. When we flatten a TwoByte rope, we turn child ropes
    335   * (including Latin1 ropes) into TwoByte dependent strings. If one of these
    336   * strings is also part of another Latin1 rope tree, we can have a Latin1 rope
    337   * with a TwoByte descendent.
    338   *
    339   * The other flags store the string's type. Instead of using a dense index
    340   * to represent the most-derived type, string types are encoded to allow
    341   * single-op tests for hot queries (isRope, isDependent, isAtom) which, in
    342   * view of subtyping, would require slower (isX() || isY() || isZ()).
    343   *
    344   * The string type encoding can be summarized as follows. The "instance
    345   * encoding" entry for a type specifies the flag bits used to create a
    346   * string instance of that type. Abstract types have no instances and thus
    347   * have no such entry. The "subtype predicate" entry for a type specifies
    348   * the predicate used to query whether a JSString instance is subtype
    349   * (reflexively) of that type.
    350   *
    351   *   String         Instance        Subtype
    352   *   type           encoding        predicate
    353   *   -----------------------------------------
    354   *   Rope           0000000 000     xxxxx0x xxx
    355   *   Linear         0000010 000     xxxxx1x xxx
    356   *   Dependent      0000110 000     xxxx1xx xxx
    357   *   AtomRef        1000110 000     1xxxxxx xxx
    358   *   External       0100010 000     x100010 xxx
    359   *   Extensible     0010010 000     x010010 xxx
    360   *   Inline         0001010 000     xxx1xxx xxx
    361   *   FatInline      0011010 000     xx11xxx xxx
    362   *   JSAtom         -               xxxxxx1 xxx
    363   *   NormalAtom     0000011 000     xxx0xx1 xxx
    364   *   PermanentAtom  0100011 000     x1xxxx1 xxx
    365   *   ThinInlineAtom 0001011 000     xx01xx1 xxx
    366   *   FatInlineAtom  0011011 000     xx11xx1 xxx
    367   *                                  ||||||| |||
    368   *                                  ||||||| ||\- [0] reserved (FORWARD_BIT)
    369   *                                  ||||||| |\-- [1] reserved
    370   *                                  ||||||| \--- [2] reserved
    371   *                                  ||||||\----- [3] IsAtom
    372   *                                  |||||\------ [4] IsLinear
    373   *                                  ||||\------- [5] IsDependent
    374   *                                  |||\-------- [6] IsInline
    375   *                                  ||\--------- [7] FatInlineAtom/Extensible
    376   *                                  |\---------- [8] External/Permanent
    377   *                                  \----------- [9] AtomRef
    378   *
    379   * Bits 0..2 are reserved for use by the GC (see
    380   * gc::CellFlagBitsReservedForGC). In particular, bit 0 is currently used for
    381   * FORWARD_BIT for forwarded nursery cells. The other 2 bits are currently
    382   * unused.
    383   *
    384   * Note that the first 4 flag bits 3..6 (from right to left in the previous
    385   * table) have the following meaning and can be used for some hot queries:
    386   *
    387   *   Bit 3: IsAtom (Atom, PermanentAtom)
    388   *   Bit 4: IsLinear
    389   *   Bit 5: IsDependent
    390   *   Bit 6: IsInline (Inline, FatInline, ThinInlineAtom, FatInlineAtom)
    391   *
    392   * If INDEX_VALUE_BIT is set, bits 16 and up will also hold an integer index.
    393   */
    394 
    395  // The low bits of flag word are reserved by GC.
    396  static_assert(js::gc::CellFlagBitsReservedForGC <= 3,
    397                "JSString::flags must reserve enough bits for Cell");
    398 
    399  static const uint32_t ATOM_BIT = js::Bit(3);
    400  static const uint32_t LINEAR_BIT = js::Bit(4);
    401  static const uint32_t DEPENDENT_BIT = js::Bit(5);
    402  static const uint32_t INLINE_CHARS_BIT = js::Bit(6);
    403  // Indicates a dependent string pointing to an atom
    404  static const uint32_t ATOM_REF_BIT = js::Bit(9);
    405 
    406  static const uint32_t LINEAR_IS_EXTENSIBLE_BIT = js::Bit(7);
    407  static const uint32_t INLINE_IS_FAT_BIT = js::Bit(7);
    408 
    409  static const uint32_t LINEAR_IS_EXTERNAL_BIT = js::Bit(8);
    410  static const uint32_t ATOM_IS_PERMANENT_BIT = js::Bit(8);
    411 
    412  static const uint32_t EXTENSIBLE_FLAGS =
    413      LINEAR_BIT | LINEAR_IS_EXTENSIBLE_BIT;
    414  static const uint32_t EXTERNAL_FLAGS = LINEAR_BIT | LINEAR_IS_EXTERNAL_BIT;
    415 
    416  static const uint32_t FAT_INLINE_MASK = INLINE_CHARS_BIT | INLINE_IS_FAT_BIT;
    417 
    418  /* Initial flags for various types of strings. */
    419  static const uint32_t INIT_THIN_INLINE_FLAGS = LINEAR_BIT | INLINE_CHARS_BIT;
    420  static const uint32_t INIT_FAT_INLINE_FLAGS = LINEAR_BIT | FAT_INLINE_MASK;
    421  static const uint32_t INIT_ROPE_FLAGS = 0;
    422  static const uint32_t INIT_LINEAR_FLAGS = LINEAR_BIT;
    423  static const uint32_t INIT_DEPENDENT_FLAGS = LINEAR_BIT | DEPENDENT_BIT;
    424  static const uint32_t INIT_ATOM_REF_FLAGS =
    425      INIT_DEPENDENT_FLAGS | ATOM_REF_BIT;
    426 
    427  static const uint32_t TYPE_FLAGS_MASK = js::BitMask(10) - js::BitMask(3);
    428  static_assert((TYPE_FLAGS_MASK & js::gc::HeaderWord::RESERVED_MASK) == 0,
    429                "GC reserved bits must not be used for Strings");
    430 
    431  // Whether this atom's characters store an uint32 index value less than or
    432  // equal to MAX_ARRAY_INDEX. This bit means something different if the
    433  // string is not an atom (see ATOM_REF_BIT)
    434  // See JSLinearString::isIndex.
    435  static const uint32_t ATOM_IS_INDEX_BIT = js::Bit(9);
    436 
    437  // Linear strings:
    438  // - Content and representation are Latin-1 characters.
    439  // - Unmodifiable after construction.
    440  //
    441  // Ropes:
    442  // - Content are Latin-1 characters.
    443  // - Flag may be cleared when the rope is changed into a dependent string.
    444  //
    445  // Also see LATIN1_CHARS_BIT description under "Flag Encoding".
    446  static const uint32_t LATIN1_CHARS_BIT = js::Bit(10);
    447 
    448  static const uint32_t INDEX_VALUE_BIT = js::Bit(11);
    449  static const uint32_t INDEX_VALUE_SHIFT = 16;
    450 
    451  // Whether this is a non-inline linear string with a refcounted
    452  // mozilla::StringBuffer.
    453  //
    454  // If set, d.s.u2.nonInlineChars* still points to the string's characters and
    455  // the StringBuffer header is stored immediately before the characters. This
    456  // allows recovering the StringBuffer from the chars pointer with
    457  // StringBuffer::FromData.
    458  static const uint32_t HAS_STRING_BUFFER_BIT = js::Bit(12);
    459 
    460  // NON_DEDUP_BIT is used in string deduplication during tenuring. This bit is
    461  // shared with both FLATTEN_FINISH_NODE and ATOM_IS_PERMANENT_BIT, since it
    462  // only applies to linear non-atoms.
    463  static const uint32_t NON_DEDUP_BIT = js::Bit(15);
    464 
    465  // If IN_STRING_TO_ATOM_CACHE is set, this string had an entry in the
    466  // StringToAtomCache at some point. Note that GC can purge the cache without
    467  // clearing this bit.
    468  static const uint32_t IN_STRING_TO_ATOM_CACHE = js::Bit(13);
    469 
    470  // Flags used during rope flattening that indicate what action to perform when
    471  // returning to the rope's parent rope.
    472  static const uint32_t FLATTEN_VISIT_RIGHT = js::Bit(14);
    473  static const uint32_t FLATTEN_FINISH_NODE = js::Bit(15);
    474  static const uint32_t FLATTEN_MASK =
    475      FLATTEN_VISIT_RIGHT | FLATTEN_FINISH_NODE;
    476 
    477  // Indicates that this string is depended on by another string. A rope should
    478  // never be depended on, and this should never be set during flattening, so
    479  // we can reuse the FLATTEN_VISIT_RIGHT bit.
    480  static const uint32_t DEPENDED_ON_BIT = FLATTEN_VISIT_RIGHT;
    481 
    482  static const uint32_t PINNED_ATOM_BIT = js::Bit(15);
    483  static const uint32_t PERMANENT_ATOM_MASK =
    484      ATOM_BIT | PINNED_ATOM_BIT | ATOM_IS_PERMANENT_BIT;
    485 
    486  static const uint32_t MAX_LENGTH = JS::MaxStringLength;
    487 
    488  static const JS::Latin1Char MAX_LATIN1_CHAR = 0xff;
    489 
    490  // Allocate a StringBuffer instead of using raw malloc for strings with
    491  // length * sizeof(CharT) >= MIN_BYTES_FOR_BUFFER.
    492  //
    493  // StringBuffers can be shared more efficiently with DOM code, but have some
    494  // additional overhead (StringBuffer header, null terminator) so for short
    495  // strings we prefer malloc.
    496  //
    497  // Note that 514 was picked as a pretty conservative initial value. The value
    498  // is just above 512 to ensure a Latin1 string of length 512 isn't bumped
    499  // from jemalloc bucket size 512 to size 768. It's an even value because it's
    500  // divided by 2 for char16_t strings.
    501  static constexpr size_t MIN_BYTES_FOR_BUFFER = 514;
    502 
    503  /*
    504   * Helper function to validate that a string of a given length is
    505   * representable by a JSString. An allocation overflow is reported if false
    506   * is returned.
    507   */
    508  static inline bool validateLength(JSContext* cx, size_t length);
    509 
    510  template <js::AllowGC allowGC>
    511  static inline bool validateLengthInternal(JSContext* cx, size_t length);
    512 
    513  static constexpr size_t offsetOfFlags() { return offsetOfHeaderFlags(); }
    514  static constexpr size_t offsetOfLength() { return offsetOfHeaderLength(); }
    515 
    516  bool sameLengthAndFlags(const JSString& other) const {
    517    return length() == other.length() && flags() == other.flags();
    518  }
    519 
    520  static void staticAsserts() {
    521    static_assert(JSString::MAX_LENGTH < UINT32_MAX,
    522                  "Length must fit in 32 bits");
    523    static_assert(
    524        sizeof(JSString) == (offsetof(JSString, d.inlineStorageLatin1) +
    525                             NUM_INLINE_CHARS_LATIN1 * sizeof(char)),
    526        "Inline Latin1 chars must fit in a JSString");
    527    static_assert(
    528        sizeof(JSString) == (offsetof(JSString, d.inlineStorageTwoByte) +
    529                             NUM_INLINE_CHARS_TWO_BYTE * sizeof(char16_t)),
    530        "Inline char16_t chars must fit in a JSString");
    531 
    532    /* Ensure js::shadow::String has the same layout. */
    533    using JS::shadow::String;
    534    static_assert(
    535        JSString::offsetOfRawHeaderFlagsField() == offsetof(String, flags_),
    536        "shadow::String flags offset must match JSString");
    537 #if JS_BITS_PER_WORD == 32
    538    static_assert(JSString::offsetOfLength() == offsetof(String, length_),
    539                  "shadow::String length offset must match JSString");
    540 #endif
    541    static_assert(offsetof(JSString, d.s.u2.nonInlineCharsLatin1) ==
    542                      offsetof(String, nonInlineCharsLatin1),
    543                  "shadow::String nonInlineChars offset must match JSString");
    544    static_assert(offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) ==
    545                      offsetof(String, nonInlineCharsTwoByte),
    546                  "shadow::String nonInlineChars offset must match JSString");
    547    static_assert(
    548        offsetof(JSString, d.s.u3.externalCallbacks) ==
    549            offsetof(String, externalCallbacks),
    550        "shadow::String externalCallbacks offset must match JSString");
    551    static_assert(offsetof(JSString, d.inlineStorageLatin1) ==
    552                      offsetof(String, inlineStorageLatin1),
    553                  "shadow::String inlineStorage offset must match JSString");
    554    static_assert(offsetof(JSString, d.inlineStorageTwoByte) ==
    555                      offsetof(String, inlineStorageTwoByte),
    556                  "shadow::String inlineStorage offset must match JSString");
    557    static_assert(ATOM_BIT == String::ATOM_BIT,
    558                  "shadow::String::ATOM_BIT must match JSString::ATOM_BIT");
    559    static_assert(LINEAR_BIT == String::LINEAR_BIT,
    560                  "shadow::String::LINEAR_BIT must match JSString::LINEAR_BIT");
    561    static_assert(INLINE_CHARS_BIT == String::INLINE_CHARS_BIT,
    562                  "shadow::String::INLINE_CHARS_BIT must match "
    563                  "JSString::INLINE_CHARS_BIT");
    564    static_assert(LATIN1_CHARS_BIT == String::LATIN1_CHARS_BIT,
    565                  "shadow::String::LATIN1_CHARS_BIT must match "
    566                  "JSString::LATIN1_CHARS_BIT");
    567    static_assert(
    568        TYPE_FLAGS_MASK == String::TYPE_FLAGS_MASK,
    569        "shadow::String::TYPE_FLAGS_MASK must match JSString::TYPE_FLAGS_MASK");
    570    static_assert(
    571        EXTERNAL_FLAGS == String::EXTERNAL_FLAGS,
    572        "shadow::String::EXTERNAL_FLAGS must match JSString::EXTERNAL_FLAGS");
    573  }
    574 
    575  /* Avoid silly compile errors in JSRope::flatten */
    576  friend class JSRope;
    577 
    578  friend class js::gc::RelocationOverlay;
    579 
    580 protected:
    581  template <typename CharT>
    582  MOZ_ALWAYS_INLINE void setNonInlineChars(const CharT* chars,
    583                                           bool usesStringBuffer);
    584 
    585  template <typename CharT>
    586  static MOZ_ALWAYS_INLINE void checkStringCharsArena(const CharT* chars,
    587                                                      bool usesStringBuffer) {
    588 #ifdef MOZ_DEBUG
    589    // Check that the new buffer is located in the StringBufferArena.
    590    // For now ignore this for StringBuffers because they can be allocated in
    591    // the main jemalloc arena.
    592    if (!usesStringBuffer) {
    593      js::AssertJSStringBufferInCorrectArena(chars);
    594    }
    595 #endif
    596  }
    597 
    598  // Get correct non-inline chars enum arm for given type
    599  template <typename CharT>
    600  MOZ_ALWAYS_INLINE const CharT* nonInlineCharsRaw() const;
    601 
    602 public:
    603  MOZ_ALWAYS_INLINE
    604  bool empty() const { return length() == 0; }
    605 
    606  inline bool getChar(JSContext* cx, size_t index, char16_t* code);
    607  inline bool getCodePoint(JSContext* cx, size_t index, char32_t* codePoint);
    608 
    609  /* Strings have either Latin1 or TwoByte chars. */
    610  bool hasLatin1Chars() const { return flags() & LATIN1_CHARS_BIT; }
    611  bool hasTwoByteChars() const { return !(flags() & LATIN1_CHARS_BIT); }
    612 
    613  /* Strings might contain cached indexes. */
    614  bool hasIndexValue() const { return flags() & INDEX_VALUE_BIT; }
    615  uint32_t getIndexValue() const {
    616    MOZ_ASSERT(hasIndexValue());
    617    MOZ_ASSERT(isLinear());
    618    return flags() >> INDEX_VALUE_SHIFT;
    619  }
    620 
    621  /*
    622   * Whether any dependent strings point to this string's chars. This is needed
    623   * so that we don't replace the string with a forwarded atom and free its
    624   * buffer.
    625   *
    626   * NOTE: we specifically do not set this for atoms, because they are accessed
    627   * on many threads and we don't want to mess with their flags if we don't
    628   * have to, and it is safe because atoms will never be replaced by an atom
    629   * ref.
    630   */
    631  bool isDependedOn() const {
    632    bool result = flags() & DEPENDED_ON_BIT;
    633    MOZ_ASSERT_IF(result, !isRope() && !isAtom());
    634    return result;
    635  }
    636 
    637  bool assertIsValidBase() const {
    638    // See isDependedOn comment for why we're excluding atoms
    639    return isAtom() || isDependedOn();
    640  }
    641 
    642  void setDependedOn() {
    643    MOZ_ASSERT(!isRope());
    644    if (isAtom()) {
    645      return;
    646    }
    647    setFlagBit(DEPENDED_ON_BIT);
    648  }
    649 
    650  inline size_t allocSize() const;
    651 
    652  /* Fallible conversions to more-derived string types. */
    653 
    654  inline JSLinearString* ensureLinear(JSContext* cx);
    655 
    656  /* Type query and debug-checked casts */
    657 
    658  MOZ_ALWAYS_INLINE
    659  bool isRope() const { return !(flags() & LINEAR_BIT); }
    660 
    661  MOZ_ALWAYS_INLINE
    662  JSRope& asRope() const {
    663    MOZ_ASSERT(isRope());
    664    return *(JSRope*)this;
    665  }
    666 
    667  MOZ_ALWAYS_INLINE
    668  bool isLinear() const { return flags() & LINEAR_BIT; }
    669 
    670  MOZ_ALWAYS_INLINE
    671  JSLinearString& asLinear() const {
    672    MOZ_ASSERT(JSString::isLinear());
    673    return *(JSLinearString*)this;
    674  }
    675 
    676  MOZ_ALWAYS_INLINE
    677  bool isDependent() const { return flags() & DEPENDENT_BIT; }
    678 
    679  MOZ_ALWAYS_INLINE
    680  bool isAtomRef() const {
    681    return (flags() & ATOM_REF_BIT) && !(flags() & ATOM_BIT);
    682  }
    683 
    684  MOZ_ALWAYS_INLINE
    685  JSDependentString& asDependent() const {
    686    MOZ_ASSERT(isDependent());
    687    return *(JSDependentString*)this;
    688  }
    689 
    690  MOZ_ALWAYS_INLINE
    691  bool isExtensible() const {
    692    return (flags() & TYPE_FLAGS_MASK) == EXTENSIBLE_FLAGS;
    693  }
    694 
    695  MOZ_ALWAYS_INLINE
    696  JSExtensibleString& asExtensible() const {
    697    MOZ_ASSERT(isExtensible());
    698    return *(JSExtensibleString*)this;
    699  }
    700 
    701  MOZ_ALWAYS_INLINE
    702  bool isInline() const { return flags() & INLINE_CHARS_BIT; }
    703 
    704  MOZ_ALWAYS_INLINE
    705  JSInlineString& asInline() const {
    706    MOZ_ASSERT(isInline());
    707    return *(JSInlineString*)this;
    708  }
    709 
    710  MOZ_ALWAYS_INLINE
    711  bool isFatInline() const {
    712    return (flags() & FAT_INLINE_MASK) == FAT_INLINE_MASK;
    713  }
    714 
    715  /* For hot code, prefer other type queries. */
    716  bool isExternal() const {
    717    return (flags() & TYPE_FLAGS_MASK) == EXTERNAL_FLAGS;
    718  }
    719 
    720  MOZ_ALWAYS_INLINE
    721  JSExternalString& asExternal() const {
    722    MOZ_ASSERT(isExternal());
    723    return *(JSExternalString*)this;
    724  }
    725 
    726  MOZ_ALWAYS_INLINE
    727  bool isAtom() const { return flags() & ATOM_BIT; }
    728 
    729  MOZ_ALWAYS_INLINE
    730  bool isPermanentAtom() const {
    731    return (flags() & PERMANENT_ATOM_MASK) == PERMANENT_ATOM_MASK;
    732  }
    733 
    734  MOZ_ALWAYS_INLINE
    735  JSAtom& asAtom() const {
    736    MOZ_ASSERT(isAtom());
    737    return *(JSAtom*)this;
    738  }
    739 
    740  MOZ_ALWAYS_INLINE
    741  js::JSOffThreadAtom& asOffThreadAtom() const {
    742    MOZ_ASSERT(headerFlagsFieldAtomic() & ATOM_BIT);
    743    return *(js::JSOffThreadAtom*)this;
    744  }
    745 
    746  MOZ_ALWAYS_INLINE
    747  void setNonDeduplicatable() {
    748    MOZ_ASSERT(isLinear());
    749    MOZ_ASSERT(!isAtom());
    750    setFlagBit(NON_DEDUP_BIT);
    751  }
    752 
    753  // After copying a string from the nursery to the tenured heap, adjust bits
    754  // that no longer apply.
    755  MOZ_ALWAYS_INLINE
    756  void clearBitsOnTenure() {
    757    MOZ_ASSERT(!isAtom());
    758    clearFlagBit(NON_DEDUP_BIT | IN_STRING_TO_ATOM_CACHE);
    759  }
    760 
    761  // NON_DEDUP_BIT is only valid for linear non-atoms.
    762  MOZ_ALWAYS_INLINE
    763  bool isDeduplicatable() const {
    764    MOZ_ASSERT(isLinear());
    765    MOZ_ASSERT(!isAtom());
    766    return !(flags() & NON_DEDUP_BIT);
    767  }
    768 
    769  void setInStringToAtomCache() {
    770    MOZ_ASSERT(!isAtom());
    771    setFlagBit(IN_STRING_TO_ATOM_CACHE);
    772  }
    773  bool inStringToAtomCache() const { return flags() & IN_STRING_TO_ATOM_CACHE; }
    774 
    775  // Fills |array| with various strings that represent the different string
    776  // kinds and character encodings.
    777  static bool fillWithRepresentatives(JSContext* cx,
    778                                      JS::Handle<js::ArrayObject*> array);
    779 
    780  /* Only called by the GC for dependent strings. */
    781 
    782  inline bool hasBase() const { return isDependent(); }
    783 
    784  inline JSLinearString* base() const;
    785 
    786  inline JSAtom* atom() const;
    787 
    788  // The base may be forwarded and becomes a relocation overlay.
    789  // The return value can be a relocation overlay when the base is forwarded,
    790  // or the return value can be the actual base when it is not forwarded.
    791  inline JSLinearString* nurseryBaseOrRelocOverlay() const;
    792 
    793  inline bool canOwnDependentChars() const;
    794 
    795  bool tryReplaceWithAtomRef(JSAtom* atom);
    796 
    797  void traceBase(JSTracer* trc);
    798 
    799  /* Only called by the GC for strings with the AllocKind::STRING kind. */
    800 
    801  inline void finalize(JS::GCContext* gcx);
    802 
    803  /* Gets the number of bytes that the chars take on the heap. */
    804 
    805  size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
    806 
    807  bool hasOutOfLineChars() const {
    808    return isLinear() && !isInline() && !isDependent() && !isExternal();
    809  }
    810 
    811  inline bool ownsMallocedChars() const;
    812 
    813  bool hasStringBuffer() const {
    814    MOZ_ASSERT_IF(flags() & HAS_STRING_BUFFER_BIT,
    815                  isLinear() && !isInline() && !isDependent() && !isExternal());
    816    return flags() & HAS_STRING_BUFFER_BIT;
    817  }
    818 
    819  /* Encode as many scalar values of the string as UTF-8 as can fit
    820   * into the caller-provided buffer replacing unpaired surrogates
    821   * with the REPLACEMENT CHARACTER.
    822   *
    823   * Returns the number of code units read and the number of code units
    824   * written.
    825   *
    826   * The semantics of this method match the semantics of
    827   * TextEncoder.encodeInto().
    828   *
    829   * This function doesn't modify the representation -- rope, linear,
    830   * flat, atom, etc. -- of this string. If this string is a rope,
    831   * it also doesn't modify the representation of left or right halves
    832   * of this string, or of those halves, and so on.
    833   *
    834   * Returns mozilla::Nothing on OOM.
    835   */
    836  mozilla::Maybe<std::tuple<size_t, size_t>> encodeUTF8Partial(
    837      const JS::AutoRequireNoGC& nogc, mozilla::Span<char> buffer) const;
    838 
    839 private:
    840  // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
    841  // to call the method below.
    842  friend class js::jit::MacroAssembler;
    843  static size_t offsetOfNonInlineChars() {
    844    static_assert(
    845        offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) ==
    846            offsetof(JSString, d.s.u2.nonInlineCharsLatin1),
    847        "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
    848    return offsetof(JSString, d.s.u2.nonInlineCharsTwoByte);
    849  }
    850 
    851 public:
    852  static const JS::TraceKind TraceKind = JS::TraceKind::String;
    853 
    854  JS::Zone* zone() const {
    855    if (isTenured()) {
    856      // Allow permanent atoms to be accessed across zones and runtimes.
    857      if (isPermanentAtom()) {
    858        return zoneFromAnyThread();
    859      }
    860      return asTenured().zone();
    861    }
    862    return nurseryZone();
    863  }
    864 
    865  void setLengthAndFlags(uint32_t len, uint32_t flags) {
    866    setHeaderLengthAndFlags(len, flags);
    867  }
    868  void setFlagBit(uint32_t flag) { setHeaderFlagBit(flag); }
    869  void clearFlagBit(uint32_t flag) { clearHeaderFlagBit(flag); }
    870 
    871  void fixupAfterMovingGC() {}
    872 
    873  js::gc::AllocKind getAllocKind() const {
    874    using js::gc::AllocKind;
    875    AllocKind kind;
    876    if (isAtom()) {
    877      if (isFatInline()) {
    878        kind = AllocKind::FAT_INLINE_ATOM;
    879      } else {
    880        kind = AllocKind::ATOM;
    881      }
    882    } else if (isFatInline()) {
    883      kind = AllocKind::FAT_INLINE_STRING;
    884    } else if (isExternal()) {
    885      kind = AllocKind::EXTERNAL_STRING;
    886    } else {
    887      kind = AllocKind::STRING;
    888    }
    889    MOZ_ASSERT_IF(isTenured(), kind == asTenured().getAllocKind());
    890    return kind;
    891  }
    892 
    893 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
    894  void dump() const;
    895  void dump(js::GenericPrinter& out) const;
    896  void dump(js::JSONPrinter& json) const;
    897 
    898  void dumpCommonFields(js::JSONPrinter& json) const;
    899  void dumpCharsFields(js::JSONPrinter& json) const;
    900 
    901  void dumpFields(js::JSONPrinter& json) const;
    902  void dumpStringContent(js::GenericPrinter& out) const;
    903  void dumpPropertyName(js::GenericPrinter& out) const;
    904 
    905  void dumpChars(js::GenericPrinter& out) const;
    906  void dumpCharsSingleQuote(js::GenericPrinter& out) const;
    907  void dumpCharsNoQuote(js::GenericPrinter& out) const;
    908 
    909  template <typename CharT>
    910  static void dumpCharsNoQuote(const CharT* s, size_t len,
    911                               js::GenericPrinter& out);
    912 
    913  void dumpRepresentation() const;
    914  void dumpRepresentation(js::GenericPrinter& out) const;
    915  void dumpRepresentation(js::JSONPrinter& json) const;
    916  void dumpRepresentationFields(js::JSONPrinter& json) const;
    917 
    918  bool equals(const char* s);
    919 #endif
    920 
    921  void traceChildren(JSTracer* trc);
    922 
    923  // Override base class implementation to tell GC about permanent atoms.
    924  bool isPermanentAndMayBeShared() const { return isPermanentAtom(); }
    925 
    926  static void addCellAddressToStoreBuffer(js::gc::StoreBuffer* buffer,
    927                                          js::gc::Cell** cellp) {
    928    buffer->putCell(reinterpret_cast<JSString**>(cellp));
    929  }
    930 
    931  static void removeCellAddressFromStoreBuffer(js::gc::StoreBuffer* buffer,
    932                                               js::gc::Cell** cellp) {
    933    buffer->unputCell(reinterpret_cast<JSString**>(cellp));
    934  }
    935 
    936 private:
    937  JSString(const JSString& other) = delete;
    938  void operator=(const JSString& other) = delete;
    939 
    940 protected:
    941  JSString() = default;
    942 };
    943 
    944 namespace js {
    945 
    946 template <typename Wrapper, typename CharT>
    947 class WrappedPtrOperations<JSString::OwnedChars<CharT>, Wrapper> {
    948  const JSString::OwnedChars<CharT>& get() const {
    949    return static_cast<const Wrapper*>(this)->get();
    950  }
    951 
    952 public:
    953  explicit operator bool() const { return !!get(); }
    954  mozilla::Span<CharT> span() const { return get().span(); }
    955  CharT* data() const { return get().data(); }
    956  size_t length() const { return get().length(); }
    957  size_t size() const { return get().size(); }
    958  bool isMalloced() const { return get().isMalloced(); }
    959  bool hasStringBuffer() const { return get().hasStringBuffer(); }
    960 };
    961 
    962 template <typename Wrapper, typename CharT>
    963 class MutableWrappedPtrOperations<JSString::OwnedChars<CharT>, Wrapper>
    964    : public WrappedPtrOperations<JSString::OwnedChars<CharT>, Wrapper> {
    965  JSString::OwnedChars<CharT>& get() {
    966    return static_cast<Wrapper*>(this)->get();
    967  }
    968 
    969 public:
    970  CharT* release() { return get().release(); }
    971  void reset() { get().reset(); }
    972  void ensureNonNursery() { get().ensureNonNursery(); }
    973 };
    974 
    975 } /* namespace js */
    976 
    977 class JSRope : public JSString {
    978  friend class js::gc::CellAllocator;
    979 
    980  template <typename CharT>
    981  js::UniquePtr<CharT[], JS::FreePolicy> copyCharsInternal(
    982      JSContext* cx, arena_id_t destArenaId) const;
    983 
    984  enum UsingBarrier : bool { NoBarrier = false, WithIncrementalBarrier = true };
    985 
    986  friend class JSString;
    987  JSLinearString* flatten(JSContext* maybecx);
    988 
    989  JSLinearString* flattenInternal();
    990  template <UsingBarrier usingBarrier>
    991  JSLinearString* flattenInternal();
    992 
    993  template <UsingBarrier usingBarrier, typename CharT>
    994  static JSLinearString* flattenInternal(JSRope* root);
    995 
    996  template <UsingBarrier usingBarrier>
    997  static void ropeBarrierDuringFlattening(JSRope* rope);
    998 
    999  JSRope(JSString* left, JSString* right, size_t length);
   1000 
   1001 public:
   1002  template <js::AllowGC allowGC>
   1003  static inline JSRope* new_(
   1004      JSContext* cx,
   1005      typename js::MaybeRooted<JSString*, allowGC>::HandleType left,
   1006      typename js::MaybeRooted<JSString*, allowGC>::HandleType right,
   1007      size_t length, js::gc::Heap = js::gc::Heap::Default);
   1008 
   1009  js::UniquePtr<JS::Latin1Char[], JS::FreePolicy> copyLatin1Chars(
   1010      JSContext* maybecx, arena_id_t destArenaId) const;
   1011  JS::UniqueTwoByteChars copyTwoByteChars(JSContext* maybecx,
   1012                                          arena_id_t destArenaId) const;
   1013 
   1014  template <typename CharT>
   1015  js::UniquePtr<CharT[], JS::FreePolicy> copyChars(
   1016      JSContext* maybecx, arena_id_t destArenaId) const;
   1017 
   1018  // Hash function specific for ropes that avoids allocating a temporary
   1019  // string. There are still allocations internally so it's technically
   1020  // fallible.
   1021  //
   1022  // Returns the same value as if this were a linear string being hashed.
   1023  [[nodiscard]] bool hash(uint32_t* outhHash) const;
   1024 
   1025  // The process of flattening a rope temporarily overwrites the left pointer of
   1026  // interior nodes in the rope DAG with the parent pointer.
   1027  bool isBeingFlattened() const { return flags() & FLATTEN_MASK; }
   1028 
   1029  JSString* leftChild() const {
   1030    MOZ_ASSERT(isRope());
   1031    MOZ_ASSERT(!isBeingFlattened());  // Flattening overwrites this field.
   1032    return d.s.u2.left;
   1033  }
   1034 
   1035  JSString* rightChild() const {
   1036    MOZ_ASSERT(isRope());
   1037    return d.s.u3.right;
   1038  }
   1039 
   1040  void traceChildren(JSTracer* trc);
   1041 
   1042 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
   1043  void dumpOwnRepresentationFields(js::JSONPrinter& json) const;
   1044 #endif
   1045 
   1046 private:
   1047  // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
   1048  // to call the methods below.
   1049  friend class js::jit::MacroAssembler;
   1050 
   1051  static size_t offsetOfLeft() { return offsetof(JSRope, d.s.u2.left); }
   1052  static size_t offsetOfRight() { return offsetof(JSRope, d.s.u3.right); }
   1053 };
   1054 
   1055 static_assert(sizeof(JSRope) == sizeof(JSString),
   1056              "string subclasses must be binary-compatible with JSString");
   1057 
   1058 /*
   1059 * There are optimized entry points for some string allocation functions.
   1060 *
   1061 * The meaning of suffix:
   1062 *   * "MaybeDeflate": for char16_t variant, characters can fit Latin1
   1063 *   * "DontDeflate": for char16_t variant, characters don't fit Latin1
   1064 *   * "NonStatic": characters don't match StaticStrings
   1065 *   * "ValidLength": length fits JSString::MAX_LENGTH
   1066 */
   1067 
   1068 class JSLinearString : public JSString {
   1069  friend class JSString;
   1070  friend class JS::AutoStableStringChars;
   1071  friend class js::gc::TenuringTracer;
   1072  friend class js::gc::CellAllocator;
   1073  friend class JSDependentString;  // To allow access when used as base.
   1074 
   1075  /* Vacuous and therefore unimplemented. */
   1076  JSLinearString* ensureLinear(JSContext* cx) = delete;
   1077  bool isLinear() const = delete;
   1078  JSLinearString& asLinear() const = delete;
   1079 
   1080  JSLinearString(const char16_t* chars, size_t length, bool hasBuffer);
   1081  JSLinearString(const JS::Latin1Char* chars, size_t length, bool hasBuffer);
   1082  template <typename CharT>
   1083  explicit inline JSLinearString(JS::MutableHandle<OwnedChars<CharT>> chars);
   1084 
   1085 protected:
   1086  // Used to construct subclasses that do a full initialization themselves.
   1087  JSLinearString() = default;
   1088 
   1089  /* Returns void pointer to latin1/twoByte chars, for finalizers. */
   1090  MOZ_ALWAYS_INLINE
   1091  void* nonInlineCharsRaw() const {
   1092    MOZ_ASSERT(!isInline());
   1093    static_assert(
   1094        offsetof(JSLinearString, d.s.u2.nonInlineCharsTwoByte) ==
   1095            offsetof(JSLinearString, d.s.u2.nonInlineCharsLatin1),
   1096        "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
   1097    return (void*)d.s.u2.nonInlineCharsTwoByte;
   1098  }
   1099 
   1100  MOZ_ALWAYS_INLINE const JS::Latin1Char* rawLatin1Chars() const;
   1101  MOZ_ALWAYS_INLINE const char16_t* rawTwoByteChars() const;
   1102 
   1103 public:
   1104  template <js::AllowGC allowGC, typename CharT>
   1105  static inline JSLinearString* new_(JSContext* cx,
   1106                                     JS::MutableHandle<OwnedChars<CharT>> chars,
   1107                                     js::gc::Heap heap);
   1108 
   1109  template <js::AllowGC allowGC, typename CharT>
   1110  static inline JSLinearString* newValidLength(
   1111      JSContext* cx, JS::MutableHandle<OwnedChars<CharT>> chars,
   1112      js::gc::Heap heap);
   1113 
   1114  // Convert a plain linear string to an extensible string. For testing. The
   1115  // caller must ensure that it is a plain or extensible string already, and
   1116  // that `capacity` is adequate.
   1117  JSExtensibleString& makeExtensible(size_t capacity);
   1118 
   1119  template <typename CharT>
   1120  MOZ_ALWAYS_INLINE const CharT* nonInlineChars(
   1121      const JS::AutoRequireNoGC& nogc) const;
   1122 
   1123  MOZ_ALWAYS_INLINE
   1124  const JS::Latin1Char* nonInlineLatin1Chars(
   1125      const JS::AutoRequireNoGC& nogc) const {
   1126    MOZ_ASSERT(!isInline());
   1127    MOZ_ASSERT(hasLatin1Chars());
   1128    return d.s.u2.nonInlineCharsLatin1;
   1129  }
   1130 
   1131  MOZ_ALWAYS_INLINE
   1132  const char16_t* nonInlineTwoByteChars(const JS::AutoRequireNoGC& nogc) const {
   1133    MOZ_ASSERT(!isInline());
   1134    MOZ_ASSERT(hasTwoByteChars());
   1135    return d.s.u2.nonInlineCharsTwoByte;
   1136  }
   1137 
   1138  template <typename CharT>
   1139  MOZ_ALWAYS_INLINE const CharT* chars(const JS::AutoRequireNoGC& nogc) const;
   1140 
   1141  MOZ_ALWAYS_INLINE
   1142  const JS::Latin1Char* latin1Chars(const JS::AutoRequireNoGC& nogc) const {
   1143    return rawLatin1Chars();
   1144  }
   1145 
   1146  MOZ_ALWAYS_INLINE
   1147  const char16_t* twoByteChars(const JS::AutoRequireNoGC& nogc) const {
   1148    return rawTwoByteChars();
   1149  }
   1150 
   1151  mozilla::Range<const JS::Latin1Char> latin1Range(
   1152      const JS::AutoRequireNoGC& nogc) const {
   1153    MOZ_ASSERT(JSString::isLinear());
   1154    return mozilla::Range<const JS::Latin1Char>(latin1Chars(nogc), length());
   1155  }
   1156 
   1157  mozilla::Range<const char16_t> twoByteRange(
   1158      const JS::AutoRequireNoGC& nogc) const {
   1159    MOZ_ASSERT(JSString::isLinear());
   1160    return mozilla::Range<const char16_t>(twoByteChars(nogc), length());
   1161  }
   1162 
   1163  template <typename CharT>
   1164  mozilla::Range<const CharT> range(const JS::AutoRequireNoGC& nogc) const {
   1165    if constexpr (std::is_same_v<CharT, JS::Latin1Char>) {
   1166      return latin1Range(nogc);
   1167    } else {
   1168      return twoByteRange(nogc);
   1169    }
   1170  }
   1171 
   1172  MOZ_ALWAYS_INLINE
   1173  char16_t latin1OrTwoByteChar(size_t index) const {
   1174    MOZ_ASSERT(JSString::isLinear());
   1175    MOZ_ASSERT(index < length());
   1176    JS::AutoCheckCannotGC nogc;
   1177    return hasLatin1Chars() ? latin1Chars(nogc)[index]
   1178                            : twoByteChars(nogc)[index];
   1179  }
   1180 
   1181  bool isIndexSlow(uint32_t* indexp) const {
   1182    MOZ_ASSERT(JSString::isLinear());
   1183    size_t len = length();
   1184    if (len == 0 || len > js::UINT32_CHAR_BUFFER_LENGTH) {
   1185      return false;
   1186    }
   1187    JS::AutoCheckCannotGC nogc;
   1188    if (hasLatin1Chars()) {
   1189      const JS::Latin1Char* s = latin1Chars(nogc);
   1190      return mozilla::IsAsciiDigit(*s) &&
   1191             js::CheckStringIsIndex(s, len, indexp);
   1192    }
   1193    const char16_t* s = twoByteChars(nogc);
   1194    return mozilla::IsAsciiDigit(*s) && js::CheckStringIsIndex(s, len, indexp);
   1195  }
   1196 
   1197  // Returns true if this string's characters store an unsigned 32-bit integer
   1198  // value less than or equal to MAX_ARRAY_INDEX, initializing *indexp to that
   1199  // value if so. Leading '0' isn't allowed except 0 itself.
   1200  // (Thus if calling isIndex returns true, js::IndexToString(cx, *indexp) will
   1201  // be a string equal to this string.)
   1202  inline bool isIndex(uint32_t* indexp) const;
   1203 
   1204  // Return whether the characters of this string can be moved by minor or
   1205  // compacting GC.
   1206  inline bool hasMovableChars() const;
   1207 
   1208  bool hasCharsInCollectedNurseryRegion() const;
   1209 
   1210  void maybeInitializeIndexValue(uint32_t index, bool allowAtom = false) {
   1211    MOZ_ASSERT(JSString::isLinear());
   1212    MOZ_ASSERT_IF(hasIndexValue(), getIndexValue() == index);
   1213    MOZ_ASSERT_IF(!allowAtom, !isAtom());
   1214 
   1215    if (hasIndexValue() || index > UINT16_MAX) {
   1216      return;
   1217    }
   1218 
   1219    mozilla::DebugOnly<uint32_t> containedIndex;
   1220    MOZ_ASSERT(isIndexSlow(&containedIndex));
   1221    MOZ_ASSERT(index == containedIndex);
   1222 
   1223    setFlagBit((index << INDEX_VALUE_SHIFT) | INDEX_VALUE_BIT);
   1224    MOZ_ASSERT(getIndexValue() == index);
   1225  }
   1226 
   1227  mozilla::StringBuffer* stringBuffer() const {
   1228    MOZ_ASSERT(hasStringBuffer());
   1229    auto* chars = nonInlineCharsRaw();
   1230    return mozilla::StringBuffer::FromData(const_cast<void*>(chars));
   1231  }
   1232 
   1233  /*
   1234   * Returns a property name represented by this string, or null on failure.
   1235   * You must verify that this is not an index per isIndex before calling
   1236   * this method.
   1237   */
   1238  inline js::PropertyName* toPropertyName(JSContext* cx);
   1239 
   1240  // Make sure chars are not in the nursery, mallocing and copying if necessary.
   1241  // Should only be called during minor GC on a string that has been promoted
   1242  // to the tenured heap and may still point to nursery-allocated chars.
   1243  template <typename CharT>
   1244  inline size_t maybeMallocCharsOnPromotion(js::Nursery* nursery);
   1245 
   1246  // Handle an edge case where a dependent chain N1 -> T2 -> N3 cannot handle N3
   1247  // moving its chars (or more specifically, updating N1 to the new chars.) When
   1248  // this is detected, convert N1 to a regular string with its own storage.
   1249  //
   1250  // Returns whether the chars were cloned.
   1251  template <typename CharT>
   1252  static void maybeCloneCharsOnPromotionTyped(JSLinearString* str);
   1253 
   1254  static void maybeCloneCharsOnPromotion(JSLinearString* str) {
   1255    if (str->hasLatin1Chars()) {
   1256      maybeCloneCharsOnPromotionTyped<JS::Latin1Char>(str);
   1257    } else {
   1258      maybeCloneCharsOnPromotionTyped<char16_t>(str);
   1259    }
   1260  }
   1261 
   1262  inline void finalize(JS::GCContext* gcx);
   1263  inline size_t allocSize() const;
   1264 
   1265 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
   1266  void dumpOwnRepresentationFields(js::JSONPrinter& json) const;
   1267 #endif
   1268 
   1269  // Make a partially-initialized string safe for finalization.
   1270  inline void disownCharsBecauseError();
   1271 };
   1272 
   1273 static_assert(sizeof(JSLinearString) == sizeof(JSString),
   1274              "string subclasses must be binary-compatible with JSString");
   1275 
   1276 namespace JS {
   1277 enum class ContractBaseChain : bool { AllowLong = false, Contract = true };
   1278 }
   1279 
   1280 class JSDependentString : public JSLinearString {
   1281  friend class JSString;
   1282  friend class js::gc::CellAllocator;
   1283 
   1284  JSDependentString(JSLinearString* base, size_t start, size_t length);
   1285 
   1286  // For JIT string allocation.
   1287  JSDependentString() = default;
   1288 
   1289  /* Vacuous and therefore unimplemented. */
   1290  bool isDependent() const = delete;
   1291  JSDependentString& asDependent() const = delete;
   1292 
   1293  /* The offset of this string's chars in base->chars(). */
   1294  MOZ_ALWAYS_INLINE size_t baseOffset() const {
   1295    MOZ_ASSERT(JSString::isDependent());
   1296    JS::AutoCheckCannotGC nogc;
   1297    size_t offset;
   1298    if (hasTwoByteChars()) {
   1299      offset = twoByteChars(nogc) - base()->twoByteChars(nogc);
   1300    } else {
   1301      offset = latin1Chars(nogc) - base()->latin1Chars(nogc);
   1302    }
   1303    MOZ_ASSERT(offset < base()->length());
   1304    return offset;
   1305  }
   1306 
   1307 public:
   1308  template <JS::ContractBaseChain contract>
   1309  static inline JSLinearString* newImpl_(JSContext* cx, JSLinearString* base,
   1310                                         size_t start, size_t length,
   1311                                         js::gc::Heap heap);
   1312 
   1313  // This will always return a dependent string, and will assert if the chars
   1314  // could fit into an inline string.
   1315  static inline JSLinearString* new_(JSContext* cx, JSLinearString* base,
   1316                                     size_t start, size_t length,
   1317                                     js::gc::Heap heap);
   1318 
   1319  // Only called by the GC during nursery collection.
   1320  void setBase(JSLinearString* newBase);
   1321 
   1322  template <typename T>
   1323  void relocateBaseAndChars(JSLinearString* base, T chars, size_t offset) {
   1324    MOZ_ASSERT(base->assertIsValidBase());
   1325    bool usesStringBuffer = base->hasStringBuffer();
   1326    setNonInlineChars(chars + offset, usesStringBuffer);
   1327    setBase(base);
   1328  }
   1329 
   1330  JSLinearString* rootBaseDuringMinorGC();
   1331 
   1332  template <typename CharT>
   1333  inline void updateToPromotedBaseImpl(JSLinearString* base);
   1334 
   1335  inline void updateToPromotedBase(JSLinearString* base);
   1336 
   1337  // Avoid creating a dependent string if no more than 6.25% (1/16) of the base
   1338  // string are used, to prevent tiny dependent strings keeping large base
   1339  // strings alive. (The percentage was chosen as a somewhat arbitrary threshold
   1340  // that is easy to compute.)
   1341  //
   1342  // Note that currently this limit only applies during tenuring; in the
   1343  // nursery, small dependent strings will be created but then cloned into
   1344  // unshared strings during tenuring. (The base string will not be marked in
   1345  // this case.)
   1346  static bool smallComparedToBase(size_t sharedChars, size_t baseChars) {
   1347    return sharedChars <= (baseChars >> 4);
   1348  }
   1349 
   1350 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
   1351  void dumpOwnRepresentationFields(js::JSONPrinter& json) const;
   1352 #endif
   1353 
   1354 private:
   1355  // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
   1356  // to call the method below.
   1357  friend class js::jit::MacroAssembler;
   1358 
   1359  inline static size_t offsetOfBase() {
   1360    return offsetof(JSDependentString, d.s.u3.base);
   1361  }
   1362 };
   1363 
   1364 static_assert(sizeof(JSDependentString) == sizeof(JSString),
   1365              "string subclasses must be binary-compatible with JSString");
   1366 
   1367 class JSAtomRefString : public JSDependentString {
   1368  friend class JSString;
   1369  friend class js::gc::CellAllocator;
   1370  friend class js::jit::MacroAssembler;
   1371 
   1372 public:
   1373  inline static size_t offsetOfAtom() {
   1374    return offsetof(JSAtomRefString, d.s.u3.atom);
   1375  }
   1376 };
   1377 
   1378 static_assert(sizeof(JSAtomRefString) == sizeof(JSString),
   1379              "string subclasses must be binary-compatible with JSString");
   1380 
   1381 class JSExtensibleString : public JSLinearString {
   1382  /* Vacuous and therefore unimplemented. */
   1383  bool isExtensible() const = delete;
   1384  JSExtensibleString& asExtensible() const = delete;
   1385 
   1386 public:
   1387  MOZ_ALWAYS_INLINE
   1388  size_t capacity() const {
   1389    MOZ_ASSERT(JSString::isExtensible());
   1390    return d.s.u3.capacity;
   1391  }
   1392 
   1393 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
   1394  void dumpOwnRepresentationFields(js::JSONPrinter& json) const;
   1395 #endif
   1396 };
   1397 
   1398 static_assert(sizeof(JSExtensibleString) == sizeof(JSString),
   1399              "string subclasses must be binary-compatible with JSString");
   1400 
   1401 class JSInlineString : public JSLinearString {
   1402 public:
   1403  MOZ_ALWAYS_INLINE
   1404  const JS::Latin1Char* latin1Chars(const JS::AutoRequireNoGC& nogc) const {
   1405    MOZ_ASSERT(JSString::isInline());
   1406    MOZ_ASSERT(hasLatin1Chars());
   1407    return d.inlineStorageLatin1;
   1408  }
   1409 
   1410  MOZ_ALWAYS_INLINE
   1411  const char16_t* twoByteChars(const JS::AutoRequireNoGC& nogc) const {
   1412    MOZ_ASSERT(JSString::isInline());
   1413    MOZ_ASSERT(hasTwoByteChars());
   1414    return d.inlineStorageTwoByte;
   1415  }
   1416 
   1417  template <typename CharT>
   1418  static bool lengthFits(size_t length);
   1419 
   1420 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
   1421  void dumpOwnRepresentationFields(js::JSONPrinter& json) const;
   1422 #endif
   1423 
   1424 private:
   1425  // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler
   1426  // to call the method below.
   1427  friend class js::jit::MacroAssembler;
   1428  static size_t offsetOfInlineStorage() {
   1429    return offsetof(JSInlineString, d.inlineStorageTwoByte);
   1430  }
   1431 };
   1432 
   1433 static_assert(sizeof(JSInlineString) == sizeof(JSString),
   1434              "string subclasses must be binary-compatible with JSString");
   1435 
   1436 /*
   1437 * On 32-bit platforms, JSThinInlineString can store 8 Latin1 characters or 4
   1438 * TwoByte characters inline. On 64-bit platforms, these numbers are 16 and 8,
   1439 * respectively.
   1440 */
   1441 class JSThinInlineString : public JSInlineString {
   1442  friend class js::gc::CellAllocator;
   1443 
   1444  // The constructors return a mutable pointer to the data, because the first
   1445  // thing any creator will do is copy in the string value. This also
   1446  // conveniently allows doing overload resolution on CharT.
   1447  explicit JSThinInlineString(size_t length, JS::Latin1Char** chars);
   1448  explicit JSThinInlineString(size_t length, char16_t** chars);
   1449 
   1450  // For JIT string allocation.
   1451  JSThinInlineString() = default;
   1452 
   1453 public:
   1454  static constexpr size_t InlineBytes = NUM_INLINE_CHARS_LATIN1;
   1455 
   1456  static const size_t MAX_LENGTH_LATIN1 = NUM_INLINE_CHARS_LATIN1;
   1457  static const size_t MAX_LENGTH_TWO_BYTE = NUM_INLINE_CHARS_TWO_BYTE;
   1458 
   1459  template <js::AllowGC allowGC>
   1460  static inline JSThinInlineString* new_(JSContext* cx, js::gc::Heap heap);
   1461 
   1462  template <typename CharT>
   1463  static bool lengthFits(size_t length);
   1464 };
   1465 
   1466 static_assert(sizeof(JSThinInlineString) == sizeof(JSString),
   1467              "string subclasses must be binary-compatible with JSString");
   1468 
   1469 /*
   1470 * On both 32-bit and 64-bit platforms, MAX_LENGTH_TWO_BYTE is 12 and
   1471 * MAX_LENGTH_LATIN1 is 24. This is deliberate, in order to minimize potential
   1472 * performance differences between 32-bit and 64-bit platforms.
   1473 *
   1474 * There are still some differences due to NUM_INLINE_CHARS_* being different.
   1475 * E.g. TwoByte strings of length 5--8 will be JSFatInlineStrings on 32-bit
   1476 * platforms and JSThinInlineStrings on 64-bit platforms. But the more
   1477 * significant transition from inline strings to non-inline strings occurs at
   1478 * length 12 (for TwoByte strings) and 24 (Latin1 strings) on both 32-bit and
   1479 * 64-bit platforms.
   1480 */
   1481 class JSFatInlineString : public JSInlineString {
   1482  friend class js::gc::CellAllocator;
   1483 
   1484  static const size_t INLINE_EXTENSION_CHARS_LATIN1 =
   1485      24 - NUM_INLINE_CHARS_LATIN1;
   1486  static const size_t INLINE_EXTENSION_CHARS_TWO_BYTE =
   1487      12 - NUM_INLINE_CHARS_TWO_BYTE;
   1488 
   1489  // The constructors return a mutable pointer to the data, because the first
   1490  // thing any creator will do is copy in the string value. This also
   1491  // conveniently allows doing overload resolution on CharT.
   1492  explicit JSFatInlineString(size_t length, JS::Latin1Char** chars);
   1493  explicit JSFatInlineString(size_t length, char16_t** chars);
   1494 
   1495  // For JIT string allocation.
   1496  JSFatInlineString() = default;
   1497 
   1498 protected: /* to fool clang into not warning this is unused */
   1499  union {
   1500    char inlineStorageExtensionLatin1[INLINE_EXTENSION_CHARS_LATIN1];
   1501    char16_t inlineStorageExtensionTwoByte[INLINE_EXTENSION_CHARS_TWO_BYTE];
   1502  };
   1503 
   1504 public:
   1505  template <js::AllowGC allowGC>
   1506  static inline JSFatInlineString* new_(JSContext* cx, js::gc::Heap heap);
   1507 
   1508  static const size_t MAX_LENGTH_LATIN1 =
   1509      JSString::NUM_INLINE_CHARS_LATIN1 + INLINE_EXTENSION_CHARS_LATIN1;
   1510 
   1511  static const size_t MAX_LENGTH_TWO_BYTE =
   1512      JSString::NUM_INLINE_CHARS_TWO_BYTE + INLINE_EXTENSION_CHARS_TWO_BYTE;
   1513 
   1514  template <typename CharT>
   1515  static bool lengthFits(size_t length);
   1516 
   1517  // Only called by the GC for strings with the AllocKind::FAT_INLINE_STRING
   1518  // kind.
   1519  MOZ_ALWAYS_INLINE void finalize(JS::GCContext* gcx);
   1520 };
   1521 
   1522 static_assert(sizeof(JSFatInlineString) % js::gc::CellAlignBytes == 0,
   1523              "fat inline strings shouldn't waste space up to the next cell "
   1524              "boundary");
   1525 
   1526 class JSExternalString : public JSLinearString {
   1527  friend class js::gc::CellAllocator;
   1528 
   1529  JSExternalString(const JS::Latin1Char* chars, size_t length,
   1530                   const JSExternalStringCallbacks* callbacks);
   1531  JSExternalString(const char16_t* chars, size_t length,
   1532                   const JSExternalStringCallbacks* callbacks);
   1533 
   1534  /* Vacuous and therefore unimplemented. */
   1535  bool isExternal() const = delete;
   1536  JSExternalString& asExternal() const = delete;
   1537 
   1538  template <typename CharT>
   1539  static inline JSExternalString* newImpl(
   1540      JSContext* cx, const CharT* chars, size_t length,
   1541      const JSExternalStringCallbacks* callbacks);
   1542 
   1543 public:
   1544  static inline JSExternalString* new_(
   1545      JSContext* cx, const JS::Latin1Char* chars, size_t length,
   1546      const JSExternalStringCallbacks* callbacks);
   1547  static inline JSExternalString* new_(
   1548      JSContext* cx, const char16_t* chars, size_t length,
   1549      const JSExternalStringCallbacks* callbacks);
   1550 
   1551  const JSExternalStringCallbacks* callbacks() const {
   1552    MOZ_ASSERT(JSString::isExternal());
   1553    return d.s.u3.externalCallbacks;
   1554  }
   1555 
   1556  // External chars are never allocated inline or in the nursery, so we can
   1557  // safely expose this without requiring an AutoCheckCannotGC argument.
   1558  const JS::Latin1Char* latin1Chars() const { return rawLatin1Chars(); }
   1559  const char16_t* twoByteChars() const { return rawTwoByteChars(); }
   1560 
   1561  // Only called by the GC for strings with the AllocKind::EXTERNAL_STRING
   1562  // kind.
   1563  inline void finalize(JS::GCContext* gcx);
   1564 
   1565 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
   1566  void dumpOwnRepresentationFields(js::JSONPrinter& json) const;
   1567 #endif
   1568 };
   1569 
   1570 static_assert(sizeof(JSExternalString) == sizeof(JSString),
   1571              "string subclasses must be binary-compatible with JSString");
   1572 
   1573 class JSAtom : public JSLinearString {
   1574  /* Vacuous and therefore unimplemented. */
   1575  bool isAtom() const = delete;
   1576  JSAtom& asAtom() const = delete;
   1577 
   1578 public:
   1579  template <typename CharT>
   1580  static inline JSAtom* newValidLength(JSContext* cx, OwnedChars<CharT>& chars,
   1581                                       js::HashNumber hash);
   1582 
   1583  /* Returns the PropertyName for this.  isIndex() must be false. */
   1584  inline js::PropertyName* asPropertyName();
   1585 
   1586  MOZ_ALWAYS_INLINE
   1587  bool isPermanent() const { return JSString::isPermanentAtom(); }
   1588 
   1589  MOZ_ALWAYS_INLINE
   1590  void makePermanent() {
   1591    MOZ_ASSERT(JSString::isAtom());
   1592    setFlagBit(PERMANENT_ATOM_MASK);
   1593  }
   1594 
   1595  MOZ_ALWAYS_INLINE bool isIndex() const {
   1596    MOZ_ASSERT(JSString::isAtom());
   1597    mozilla::DebugOnly<uint32_t> index;
   1598    MOZ_ASSERT(!!(flags() & ATOM_IS_INDEX_BIT) == isIndexSlow(&index));
   1599    return flags() & ATOM_IS_INDEX_BIT;
   1600  }
   1601  MOZ_ALWAYS_INLINE bool isIndex(uint32_t* index) const {
   1602    MOZ_ASSERT(JSString::isAtom());
   1603    if (!isIndex()) {
   1604      return false;
   1605    }
   1606    *index = hasIndexValue() ? getIndexValue() : getIndexSlow();
   1607    return true;
   1608  }
   1609 
   1610  uint32_t getIndexSlow() const;
   1611 
   1612  void setIsIndex(uint32_t index) {
   1613    MOZ_ASSERT(JSString::isAtom());
   1614    setFlagBit(ATOM_IS_INDEX_BIT);
   1615    maybeInitializeIndexValue(index, /* allowAtom = */ true);
   1616  }
   1617 
   1618  MOZ_ALWAYS_INLINE bool isPinned() const { return flags() & PINNED_ATOM_BIT; }
   1619 
   1620  void setPinned() {
   1621    MOZ_ASSERT(!isPinned());
   1622    setFlagBit(PINNED_ATOM_BIT);
   1623  }
   1624 
   1625  inline js::HashNumber hash() const;
   1626 
   1627  template <typename CharT>
   1628  static bool lengthFitsInline(size_t length);
   1629 
   1630 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
   1631  void dump(js::GenericPrinter& out);
   1632  void dump();
   1633 #endif
   1634 };
   1635 
   1636 namespace js {
   1637 
   1638 class NormalAtom : public JSAtom {
   1639  friend class gc::CellAllocator;
   1640 
   1641 protected:
   1642  static constexpr size_t ExtensionBytes =
   1643      js::gc::CellAlignBytes - sizeof(js::HashNumber);
   1644 
   1645  char inlineStorage_[ExtensionBytes];
   1646  HashNumber hash_;
   1647 
   1648  // For subclasses to call.
   1649  explicit NormalAtom(js::HashNumber hash) : hash_(hash) {}
   1650 
   1651  // Out of line atoms, mimicking JSLinearString constructor.
   1652  template <typename CharT>
   1653  NormalAtom(const OwnedChars<CharT>& chars, js::HashNumber hash);
   1654 
   1655 public:
   1656  HashNumber hash() const { return hash_; }
   1657 
   1658  static constexpr size_t offsetOfHash() { return offsetof(NormalAtom, hash_); }
   1659 };
   1660 
   1661 static_assert(sizeof(NormalAtom) ==
   1662                  js::RoundUp(sizeof(JSString) + sizeof(js::HashNumber),
   1663                              js::gc::CellAlignBytes),
   1664              "NormalAtom must have size of a string + HashNumber, "
   1665              "aligned to gc::CellAlignBytes");
   1666 
   1667 class ThinInlineAtom : public NormalAtom {
   1668  friend class gc::CellAllocator;
   1669 
   1670 public:
   1671  static constexpr size_t MAX_LENGTH_LATIN1 =
   1672      NUM_INLINE_CHARS_LATIN1 + ExtensionBytes / sizeof(JS::Latin1Char);
   1673  static constexpr size_t MAX_LENGTH_TWO_BYTE =
   1674      NUM_INLINE_CHARS_TWO_BYTE + ExtensionBytes / sizeof(char16_t);
   1675 
   1676 #ifdef JS_64BIT
   1677  // Fat and Thin inline atoms are the same size. Only use fat.
   1678  static constexpr bool EverInstantiated = false;
   1679 #else
   1680  static constexpr bool EverInstantiated = true;
   1681 #endif
   1682 
   1683 protected:
   1684  // Mimicking JSThinInlineString constructors.
   1685 #ifdef JS_64BIT
   1686  ThinInlineAtom(size_t length, JS::Latin1Char** chars,
   1687                 js::HashNumber hash) = delete;
   1688  ThinInlineAtom(size_t length, char16_t** chars, js::HashNumber hash) = delete;
   1689 #else
   1690  ThinInlineAtom(size_t length, JS::Latin1Char** chars, js::HashNumber hash);
   1691  ThinInlineAtom(size_t length, char16_t** chars, js::HashNumber hash);
   1692 #endif
   1693 
   1694 public:
   1695  template <typename CharT>
   1696  static bool lengthFits(size_t length) {
   1697    if constexpr (sizeof(CharT) == sizeof(JS::Latin1Char)) {
   1698      return length <= MAX_LENGTH_LATIN1;
   1699    } else {
   1700      return length <= MAX_LENGTH_TWO_BYTE;
   1701    }
   1702  }
   1703 };
   1704 
   1705 // FatInlineAtom is basically a JSFatInlineString, except it has a hash value in
   1706 // the last word that reduces the inline char storage.
   1707 class FatInlineAtom : public JSAtom {
   1708  friend class gc::CellAllocator;
   1709 
   1710  // The space available for storing inline characters. It's the same amount of
   1711  // space as a JSFatInlineString, except we take the hash value out of it.
   1712  static constexpr size_t InlineBytes = sizeof(JSFatInlineString) -
   1713                                        sizeof(JSString::Base) -
   1714                                        sizeof(js::HashNumber);
   1715 
   1716  static constexpr size_t ExtensionBytes =
   1717      InlineBytes - JSThinInlineString::InlineBytes;
   1718 
   1719 public:
   1720  static constexpr size_t MAX_LENGTH_LATIN1 =
   1721      InlineBytes / sizeof(JS::Latin1Char);
   1722  static constexpr size_t MAX_LENGTH_TWO_BYTE = InlineBytes / sizeof(char16_t);
   1723 
   1724 protected:  // Silence Clang unused-field warning.
   1725  char inlineStorage_[ExtensionBytes];
   1726  HashNumber hash_;
   1727 
   1728  // Mimicking JSFatInlineString constructors.
   1729  explicit FatInlineAtom(size_t length, JS::Latin1Char** chars,
   1730                         js::HashNumber hash);
   1731  explicit FatInlineAtom(size_t length, char16_t** chars, js::HashNumber hash);
   1732 
   1733 public:
   1734  HashNumber hash() const { return hash_; }
   1735 
   1736  inline void finalize(JS::GCContext* gcx);
   1737 
   1738  static constexpr size_t offsetOfHash() {
   1739    static_assert(
   1740        sizeof(FatInlineAtom) ==
   1741            js::RoundUp(sizeof(JSThinInlineString) +
   1742                            FatInlineAtom::ExtensionBytes + sizeof(HashNumber),
   1743                        gc::CellAlignBytes),
   1744        "FatInlineAtom must have size of a thin inline string + "
   1745        "extension bytes if any + HashNumber, "
   1746        "aligned to gc::CellAlignBytes");
   1747 
   1748    return offsetof(FatInlineAtom, hash_);
   1749  }
   1750 
   1751  template <typename CharT>
   1752  static bool lengthFits(size_t length) {
   1753    return length * sizeof(CharT) <= InlineBytes;
   1754  }
   1755 };
   1756 
   1757 static_assert(sizeof(FatInlineAtom) == sizeof(JSFatInlineString),
   1758              "FatInlineAtom must be the same size as a fat inline string");
   1759 
   1760 // When an algorithm does not need a string represented as a single linear
   1761 // array of characters, this range utility may be used to traverse the string a
   1762 // sequence of linear arrays of characters. This avoids flattening ropes.
   1763 template <size_t Size = 16>
   1764 class StringSegmentRange {
   1765  // If malloc() shows up in any profiles from this vector, we can add a new
   1766  // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
   1767  using StackVector = JS::GCVector<JSString*, Size>;
   1768  Rooted<StackVector> stack;
   1769  Rooted<JSLinearString*> cur;
   1770 
   1771  bool settle(JSString* str) {
   1772    while (str->isRope()) {
   1773      JSRope& rope = str->asRope();
   1774      if (!stack.append(rope.rightChild())) {
   1775        return false;
   1776      }
   1777      str = rope.leftChild();
   1778    }
   1779    cur = &str->asLinear();
   1780    return true;
   1781  }
   1782 
   1783 public:
   1784  explicit StringSegmentRange(JSContext* cx)
   1785      : stack(cx, StackVector(cx)), cur(cx) {}
   1786 
   1787  [[nodiscard]] bool init(JSString* str) {
   1788    MOZ_ASSERT(stack.empty());
   1789    return settle(str);
   1790  }
   1791 
   1792  bool empty() const { return cur == nullptr; }
   1793 
   1794  JSLinearString* front() const {
   1795    MOZ_ASSERT(!cur->isRope());
   1796    return cur;
   1797  }
   1798 
   1799  [[nodiscard]] bool popFront() {
   1800    MOZ_ASSERT(!empty());
   1801    if (stack.empty()) {
   1802      cur = nullptr;
   1803      return true;
   1804    }
   1805    return settle(stack.popCopy());
   1806  }
   1807 };
   1808 
   1809 // This class should be used in code that manipulates strings off-thread (for
   1810 // example, Ion compilation). The key difference is that flags are loaded
   1811 // atomically, preventing data races if flags (especially the pinned atom bit)
   1812 // are mutated on the main thread. We use private inheritance to avoid
   1813 // accidentally exposing anything non-thread-safe.
   1814 class JSOffThreadAtom : private JSAtom {
   1815 public:
   1816  size_t length() const { return headerLengthFieldAtomic(); }
   1817  size_t flags() const { return headerFlagsFieldAtomic(); }
   1818 
   1819  bool empty() const { return length() == 0; }
   1820 
   1821  bool hasLatin1Chars() const { return flags() & LATIN1_CHARS_BIT; }
   1822  bool hasTwoByteChars() const { return !(flags() & LATIN1_CHARS_BIT); }
   1823 
   1824  bool isAtom() const { return flags() & ATOM_BIT; }
   1825  bool isInline() const { return flags() & INLINE_CHARS_BIT; }
   1826  bool hasIndexValue() const { return flags() & INDEX_VALUE_BIT; }
   1827  bool isIndex() const { return flags() & ATOM_IS_INDEX_BIT; }
   1828  bool isFatInline() const {
   1829    return (flags() & FAT_INLINE_MASK) == FAT_INLINE_MASK;
   1830  }
   1831 
   1832  uint32_t getIndexValue() const {
   1833    MOZ_ASSERT(hasIndexValue());
   1834    return flags() >> INDEX_VALUE_SHIFT;
   1835  }
   1836  bool isIndex(uint32_t* index) const {
   1837    if (!isIndex()) {
   1838      return false;
   1839    }
   1840    *index = hasIndexValue() ? getIndexValue() : getIndexSlow();
   1841    return true;
   1842  }
   1843  uint32_t getIndexSlow() const;
   1844 
   1845  const JS::Latin1Char* latin1Chars(const JS::AutoRequireNoGC& nogc) const {
   1846    MOZ_ASSERT(hasLatin1Chars());
   1847    return isInline() ? d.inlineStorageLatin1 : d.s.u2.nonInlineCharsLatin1;
   1848  };
   1849  const char16_t* twoByteChars(const JS::AutoRequireNoGC& nogc) const {
   1850    MOZ_ASSERT(hasTwoByteChars());
   1851    return JSLinearString::twoByteChars(nogc);
   1852    return isInline() ? d.inlineStorageTwoByte : d.s.u2.nonInlineCharsTwoByte;
   1853  }
   1854  mozilla::Range<const JS::Latin1Char> latin1Range(
   1855      const JS::AutoRequireNoGC& nogc) const {
   1856    return mozilla::Range<const JS::Latin1Char>(latin1Chars(nogc), length());
   1857  }
   1858  mozilla::Range<const char16_t> twoByteRange(
   1859      const JS::AutoRequireNoGC& nogc) const {
   1860    return mozilla::Range<const char16_t>(twoByteChars(nogc), length());
   1861  }
   1862  char16_t latin1OrTwoByteChar(size_t index) const {
   1863    MOZ_ASSERT(index < length());
   1864    JS::AutoCheckCannotGC nogc;
   1865    return hasLatin1Chars() ? latin1Chars(nogc)[index]
   1866                            : twoByteChars(nogc)[index];
   1867  }
   1868 
   1869  inline HashNumber hash() const {
   1870    if (isFatInline()) {
   1871      return reinterpret_cast<const js::FatInlineAtom*>(this)->hash();
   1872    }
   1873    return reinterpret_cast<const js::NormalAtom*>(this)->hash();
   1874  }
   1875 
   1876  JSAtom* unwrap() { return this; }
   1877  const JSAtom* unwrap() const { return this; }
   1878 
   1879  // Should only be used to get an opaque pointer for baking into jitcode.
   1880  const js::gc::Cell* raw() const { return this; }
   1881 };
   1882 
   1883 }  // namespace js
   1884 
   1885 inline js::HashNumber JSAtom::hash() const {
   1886  if (isFatInline()) {
   1887    return static_cast<const js::FatInlineAtom*>(this)->hash();
   1888  }
   1889  return static_cast<const js::NormalAtom*>(this)->hash();
   1890 }
   1891 
   1892 namespace js {
   1893 
   1894 /*
   1895 * Represents an atomized string which does not contain an index (that is, an
   1896 * unsigned 32-bit value).  Thus for any PropertyName propname,
   1897 * ToString(ToUint32(propname)) never equals propname.
   1898 *
   1899 * To more concretely illustrate the utility of PropertyName, consider that it
   1900 * is used to partition, in a type-safe manner, the ways to refer to a
   1901 * property, as follows:
   1902 *
   1903 *   - uint32_t indexes,
   1904 *   - PropertyName strings which don't encode uint32_t indexes,
   1905 *   - Symbol, and
   1906 *   - JS::PropertyKey::isVoid.
   1907 */
   1908 class PropertyName : public JSAtom {
   1909 private:
   1910  /* Vacuous and therefore unimplemented. */
   1911  PropertyName* asPropertyName() = delete;
   1912 };
   1913 
   1914 static_assert(sizeof(PropertyName) == sizeof(JSString),
   1915              "string subclasses must be binary-compatible with JSString");
   1916 
   1917 static MOZ_ALWAYS_INLINE jsid NameToId(PropertyName* name) {
   1918  return JS::PropertyKey::NonIntAtom(name);
   1919 }
   1920 
   1921 using PropertyNameVector = JS::GCVector<PropertyName*>;
   1922 
   1923 template <typename CharT>
   1924 void CopyChars(CharT* dest, const JSLinearString& str);
   1925 
   1926 static inline UniqueChars StringToNewUTF8CharsZ(JSContext* cx, JSString& str) {
   1927  JS::AutoCheckCannotGC nogc;
   1928 
   1929  JSLinearString* linear = str.ensureLinear(cx);
   1930  if (!linear) {
   1931    return nullptr;
   1932  }
   1933 
   1934  return UniqueChars(
   1935      linear->hasLatin1Chars()
   1936          ? JS::CharsToNewUTF8CharsZ(cx, linear->latin1Range(nogc)).c_str()
   1937          : JS::CharsToNewUTF8CharsZ(cx, linear->twoByteRange(nogc)).c_str());
   1938 }
   1939 
   1940 template <typename CharT>
   1941 extern JSString::OwnedChars<CharT> AllocAtomCharsValidLength(JSContext* cx,
   1942                                                             size_t length);
   1943 
   1944 /**
   1945 * Allocate a string with the given contents.  If |allowGC == CanGC|, this may
   1946 * trigger a GC.
   1947 */
   1948 template <js::AllowGC allowGC, typename CharT>
   1949 extern JSLinearString* NewString(JSContext* cx,
   1950                                 UniquePtr<CharT[], JS::FreePolicy> chars,
   1951                                 size_t length,
   1952                                 js::gc::Heap heap = js::gc::Heap::Default);
   1953 
   1954 /* Like NewString, but doesn't try to deflate to Latin1. */
   1955 template <js::AllowGC allowGC, typename CharT>
   1956 extern JSLinearString* NewStringDontDeflate(
   1957    JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t length,
   1958    js::gc::Heap heap = js::gc::Heap::Default);
   1959 
   1960 /* This may return a static string/atom or an inline string. */
   1961 extern JSLinearString* NewDependentString(
   1962    JSContext* cx, JSString* base, size_t start, size_t length,
   1963    js::gc::Heap heap = js::gc::Heap::Default);
   1964 
   1965 /* As above, but give an option to not contract the chain of base strings, in
   1966 order to create messier situations for testing (some of which may not be
   1967 possible in practice). */
   1968 extern JSLinearString* NewDependentStringForTesting(
   1969    JSContext* cx, JSString* base, size_t start, size_t length,
   1970    JS::ContractBaseChain contract, js::gc::Heap heap);
   1971 
   1972 /* Take ownership of an array of Latin1Chars. */
   1973 extern JSLinearString* NewLatin1StringZ(
   1974    JSContext* cx, UniqueChars chars,
   1975    js::gc::Heap heap = js::gc::Heap::Default);
   1976 
   1977 /* Copy a counted string and GC-allocate a descriptor for it. */
   1978 template <js::AllowGC allowGC, typename CharT>
   1979 extern JSLinearString* NewStringCopyN(
   1980    JSContext* cx, const CharT* s, size_t n,
   1981    js::gc::Heap heap = js::gc::Heap::Default);
   1982 
   1983 template <js::AllowGC allowGC>
   1984 inline JSLinearString* NewStringCopyN(
   1985    JSContext* cx, const char* s, size_t n,
   1986    js::gc::Heap heap = js::gc::Heap::Default) {
   1987  return NewStringCopyN<allowGC>(cx, reinterpret_cast<const Latin1Char*>(s), n,
   1988                                 heap);
   1989 }
   1990 
   1991 template <typename CharT>
   1992 extern JSAtom* NewAtomCopyNMaybeDeflateValidLength(JSContext* cx,
   1993                                                   const CharT* s, size_t n,
   1994                                                   js::HashNumber hash);
   1995 
   1996 template <typename CharT>
   1997 extern JSAtom* NewAtomCopyNDontDeflateValidLength(JSContext* cx, const CharT* s,
   1998                                                  size_t n,
   1999                                                  js::HashNumber hash);
   2000 
   2001 /* Copy a counted string and GC-allocate a descriptor for it. */
   2002 template <js::AllowGC allowGC, typename CharT>
   2003 inline JSLinearString* NewStringCopy(
   2004    JSContext* cx, mozilla::Span<const CharT> s,
   2005    js::gc::Heap heap = js::gc::Heap::Default) {
   2006  return NewStringCopyN<allowGC>(cx, s.data(), s.size(), heap);
   2007 }
   2008 
   2009 /* Copy a counted string and GC-allocate a descriptor for it. */
   2010 template <
   2011    js::AllowGC allowGC, typename CharT,
   2012    typename std::enable_if_t<!std::is_same_v<CharT, unsigned char>>* = nullptr>
   2013 inline JSLinearString* NewStringCopy(
   2014    JSContext* cx, std::basic_string_view<CharT> s,
   2015    js::gc::Heap heap = js::gc::Heap::Default) {
   2016  return NewStringCopyN<allowGC>(cx, s.data(), s.size(), heap);
   2017 }
   2018 
   2019 /* Like NewStringCopyN, but doesn't try to deflate to Latin1. */
   2020 template <js::AllowGC allowGC, typename CharT>
   2021 extern JSLinearString* NewStringCopyNDontDeflate(
   2022    JSContext* cx, const CharT* s, size_t n,
   2023    js::gc::Heap heap = js::gc::Heap::Default);
   2024 
   2025 template <js::AllowGC allowGC, typename CharT>
   2026 extern JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength(
   2027    JSContext* cx, const CharT* s, size_t n,
   2028    js::gc::Heap heap = js::gc::Heap::Default);
   2029 
   2030 /* Copy a C string and GC-allocate a descriptor for it. */
   2031 template <js::AllowGC allowGC>
   2032 inline JSLinearString* NewStringCopyZ(
   2033    JSContext* cx, const char16_t* s,
   2034    js::gc::Heap heap = js::gc::Heap::Default) {
   2035  return NewStringCopyN<allowGC>(cx, s, js_strlen(s), heap);
   2036 }
   2037 
   2038 template <js::AllowGC allowGC>
   2039 inline JSLinearString* NewStringCopyZ(
   2040    JSContext* cx, const char* s, js::gc::Heap heap = js::gc::Heap::Default) {
   2041  return NewStringCopyN<allowGC>(cx, s, strlen(s), heap);
   2042 }
   2043 
   2044 extern JSLinearString* NewStringCopyUTF8N(
   2045    JSContext* cx, const JS::UTF8Chars& utf8, JS::SmallestEncoding encoding,
   2046    js::gc::Heap heap = js::gc::Heap::Default);
   2047 
   2048 extern JSLinearString* NewStringCopyUTF8N(
   2049    JSContext* cx, const JS::UTF8Chars& utf8,
   2050    js::gc::Heap heap = js::gc::Heap::Default);
   2051 
   2052 inline JSLinearString* NewStringCopyUTF8Z(
   2053    JSContext* cx, const JS::ConstUTF8CharsZ utf8,
   2054    js::gc::Heap heap = js::gc::Heap::Default) {
   2055  return NewStringCopyUTF8N(
   2056      cx, JS::UTF8Chars(utf8.c_str(), strlen(utf8.c_str())), heap);
   2057 }
   2058 
   2059 template <typename CharT>
   2060 JSString* NewMaybeExternalString(JSContext* cx, const CharT* s, size_t n,
   2061                                 const JSExternalStringCallbacks* callbacks,
   2062                                 bool* allocatedExternal,
   2063                                 js::gc::Heap heap = js::gc::Heap::Default);
   2064 
   2065 static_assert(sizeof(HashNumber) == 4);
   2066 
   2067 template <AllowGC allowGC>
   2068 extern JSString* ConcatStrings(
   2069    JSContext* cx, typename MaybeRooted<JSString*, allowGC>::HandleType left,
   2070    typename MaybeRooted<JSString*, allowGC>::HandleType right,
   2071    js::gc::Heap heap = js::gc::Heap::Default);
   2072 
   2073 /*
   2074 * Test if strings are equal. The caller can call the function even if str1
   2075 * or str2 are not GC-allocated things.
   2076 */
   2077 extern bool EqualStrings(JSContext* cx, JSString* str1, JSString* str2,
   2078                         bool* result);
   2079 
   2080 /* Use the infallible method instead! */
   2081 extern bool EqualStrings(JSContext* cx, JSLinearString* str1,
   2082                         JSLinearString* str2, bool* result) = delete;
   2083 
   2084 /* EqualStrings is infallible on linear strings. */
   2085 extern bool EqualStrings(const JSLinearString* str1,
   2086                         const JSLinearString* str2);
   2087 
   2088 /**
   2089 * Compare two strings that are known to be the same length.
   2090 * Exposed for the JITs; for ordinary uses, EqualStrings() is more sensible.
   2091 *
   2092 * The caller must have checked for the following cases that can be handled
   2093 * efficiently without requiring a character comparison:
   2094 *   - str1 == str2
   2095 *   - str1->length() != str2->length()
   2096 *   - str1->isAtom() && str2->isAtom()
   2097 */
   2098 extern bool EqualChars(const JSLinearString* str1, const JSLinearString* str2);
   2099 
   2100 /*
   2101 * Return less than, equal to, or greater than zero depending on whether
   2102 * `s1[0..len1]` is less than, equal to, or greater than `s2`.
   2103 */
   2104 extern int32_t CompareChars(const char16_t* s1, size_t len1,
   2105                            const JSLinearString* s2);
   2106 
   2107 /*
   2108 * Compare two strings, like CompareChars, but store the result in `*result`.
   2109 * This flattens the strings and therefore can fail.
   2110 */
   2111 extern bool CompareStrings(JSContext* cx, JSString* str1, JSString* str2,
   2112                           int32_t* result);
   2113 
   2114 /*
   2115 * Compare two strings, like CompareChars.
   2116 */
   2117 extern int32_t CompareStrings(const JSLinearString* str1,
   2118                              const JSLinearString* str2);
   2119 
   2120 /*
   2121 * Compare two strings, like CompareChars. Can be called off-thread.
   2122 */
   2123 extern int32_t CompareStrings(const JSOffThreadAtom* str1,
   2124                              const JSOffThreadAtom* str2);
   2125 
   2126 /**
   2127 * Return true if the string contains only ASCII characters.
   2128 */
   2129 extern bool StringIsAscii(const JSLinearString* str);
   2130 
   2131 /*
   2132 * Return true if the string matches the given sequence of ASCII bytes.
   2133 */
   2134 extern bool StringEqualsAscii(const JSLinearString* str,
   2135                              const char* asciiBytes);
   2136 /*
   2137 * Return true if the string matches the given sequence of ASCII
   2138 * bytes.  The sequence of ASCII bytes must have length "length".  The
   2139 * length should not include the trailing null, if any.
   2140 */
   2141 extern bool StringEqualsAscii(const JSLinearString* str, const char* asciiBytes,
   2142                              size_t length);
   2143 
   2144 template <size_t N>
   2145 bool StringEqualsLiteral(const JSLinearString* str,
   2146                         const char (&asciiBytes)[N]) {
   2147  MOZ_ASSERT(asciiBytes[N - 1] == '\0');
   2148  return StringEqualsAscii(str, asciiBytes, N - 1);
   2149 }
   2150 
   2151 extern int StringFindPattern(const JSLinearString* text,
   2152                             const JSLinearString* pat, size_t start);
   2153 
   2154 /**
   2155 * Return true if the string contains a pattern at |start|.
   2156 *
   2157 * Precondition: `text` is long enough that this might be true;
   2158 * that is, it has at least `start + pat->length()` characters.
   2159 */
   2160 extern bool HasSubstringAt(const JSLinearString* text,
   2161                           const JSLinearString* pat, size_t start);
   2162 
   2163 /*
   2164 * Computes |str|'s substring for the range [beginInt, beginInt + lengthInt).
   2165 * Negative, overlarge, swapped, etc. |beginInt| and |lengthInt| are forbidden
   2166 * and constitute API misuse.
   2167 */
   2168 JSString* SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
   2169                          int32_t lengthInt);
   2170 
   2171 inline js::HashNumber HashStringChars(const JSLinearString* str) {
   2172  JS::AutoCheckCannotGC nogc;
   2173  size_t len = str->length();
   2174  return str->hasLatin1Chars()
   2175             ? mozilla::HashString(str->latin1Chars(nogc), len)
   2176             : mozilla::HashString(str->twoByteChars(nogc), len);
   2177 }
   2178 
   2179 /**
   2180 * Allocate string characters when the final string length is known in advance.
   2181 */
   2182 template <typename CharT>
   2183 class MOZ_NON_PARAM StringChars {
   2184  static constexpr size_t InlineLength =
   2185      std::is_same_v<CharT, JS::Latin1Char>
   2186          ? JSFatInlineString::MAX_LENGTH_LATIN1
   2187          : JSFatInlineString::MAX_LENGTH_TWO_BYTE;
   2188 
   2189  CharT inlineChars_[InlineLength];
   2190  Rooted<JSString::OwnedChars<CharT>> ownedChars_;
   2191 
   2192 #ifdef DEBUG
   2193  // In debug mode, we keep track of the requested string lengths to ensure all
   2194  // methods are called in the correct order and with the expected argument
   2195  // values.
   2196  size_t lastRequestedLength_ = 0;
   2197 
   2198  void assertValidRequest(size_t expectedLastLength, size_t length) {
   2199    MOZ_ASSERT(length >= expectedLastLength, "cannot shrink requested length");
   2200    MOZ_ASSERT(lastRequestedLength_ == expectedLastLength);
   2201    lastRequestedLength_ = length;
   2202  }
   2203 #else
   2204  void assertValidRequest(size_t expectedLastLength, size_t length) {}
   2205 #endif
   2206 
   2207 public:
   2208  explicit StringChars(JSContext* cx) : ownedChars_(cx) {}
   2209 
   2210  /**
   2211   * Return a raw pointer to the string characters. The pointer can point to
   2212   * nursery allocated memory, so the caller should ensure no GC can happen
   2213   * while using this pointer.
   2214   */
   2215  CharT* data(const JS::AutoRequireNoGC&) {
   2216    return ownedChars_ ? ownedChars_.data() : inlineChars_;
   2217  }
   2218 
   2219  /**
   2220   * Escape hatch when it's not possible to call `data(nogc)`. Use with caution!
   2221   */
   2222  CharT* unsafeData() {
   2223    return ownedChars_ ? ownedChars_.data() : inlineChars_;
   2224  }
   2225 
   2226  /**
   2227   * Prepare for writing |length| characters. Allocates iff `length` exceeds the
   2228   * inline storage of this class.
   2229   */
   2230  bool maybeAlloc(JSContext* cx, size_t length,
   2231                  gc::Heap heap = gc::Heap::Default);
   2232 
   2233  /**
   2234   * Increase the string characters storage. Allocates iff `newLength` exceeds
   2235   * the inline storage of this class.
   2236   */
   2237  bool maybeRealloc(JSContext* cx, size_t oldLength, size_t newLength,
   2238                    gc::Heap heap = gc::Heap::Default);
   2239 
   2240  /**
   2241   * Build the result string. Does not deflate two-byte characters if all
   2242   * characters fit into Latin-1.
   2243   */
   2244  template <AllowGC allowGC>
   2245  JSLinearString* toStringDontDeflate(JSContext* cx, size_t length,
   2246                                      gc::Heap heap = gc::Heap::Default);
   2247 
   2248  /**
   2249   * Build the result string. Does not deflate two-byte characters if all
   2250   * characters fit into Latin-1. And does not check static strings.
   2251   */
   2252  template <AllowGC allowGC>
   2253  JSLinearString* toStringDontDeflateNonStatic(
   2254      JSContext* cx, size_t length, gc::Heap heap = gc::Heap::Default);
   2255 };
   2256 
   2257 /**
   2258 * Allocate atom characters when the final string length is known in advance.
   2259 */
   2260 template <typename CharT>
   2261 class MOZ_NON_PARAM AtomStringChars {
   2262  static constexpr size_t InlineLength =
   2263      std::is_same_v<CharT, JS::Latin1Char>
   2264          ? JSFatInlineString::MAX_LENGTH_LATIN1
   2265          : JSFatInlineString::MAX_LENGTH_TWO_BYTE;
   2266 
   2267  CharT inlineChars_[InlineLength];
   2268  UniquePtr<CharT[], JS::FreePolicy> mallocChars_;
   2269 
   2270 #ifdef DEBUG
   2271  // In debug mode, we keep track of the requested string lengths to ensure all
   2272  // methods are called in the correct order and with the expected argument
   2273  // values.
   2274  size_t lastRequestedLength_ = 0;
   2275 
   2276  void assertValidRequest(size_t expectedLastLength, size_t length) {
   2277    MOZ_ASSERT(length >= expectedLastLength, "cannot shrink requested length");
   2278    MOZ_ASSERT(lastRequestedLength_ == expectedLastLength);
   2279    lastRequestedLength_ = length;
   2280  }
   2281 #else
   2282  void assertValidRequest(size_t expectedLastLength, size_t length) {}
   2283 #endif
   2284 
   2285 public:
   2286  /**
   2287   * Return a raw pointer to the string characters.
   2288   */
   2289  CharT* data() { return mallocChars_ ? mallocChars_.get() : inlineChars_; }
   2290 
   2291  /**
   2292   * Prepare for writing |length| characters. Allocates iff `length` exceeds the
   2293   * inline storage of this class.
   2294   */
   2295  bool maybeAlloc(JSContext* cx, size_t length);
   2296 
   2297  /**
   2298   * Build the result atom string.
   2299   */
   2300  JSAtom* toAtom(JSContext* cx, size_t length);
   2301 };
   2302 
   2303 /*** Conversions ************************************************************/
   2304 
   2305 /*
   2306 * Convert a string to a printable C string.
   2307 *
   2308 * Asserts if the input contains any non-ASCII characters.
   2309 */
   2310 UniqueChars EncodeAscii(JSContext* cx, JSString* str);
   2311 
   2312 /*
   2313 * Convert a string to a printable C string.
   2314 */
   2315 UniqueChars EncodeLatin1(JSContext* cx, JSString* str);
   2316 
   2317 enum class IdToPrintableBehavior : bool {
   2318  /*
   2319   * Request the printable representation of an identifier.
   2320   */
   2321  IdIsIdentifier,
   2322 
   2323  /*
   2324   * Request the printable representation of a property key.
   2325   */
   2326  IdIsPropertyKey
   2327 };
   2328 
   2329 /*
   2330 * Convert a jsid to a printable C string encoded in UTF-8.
   2331 */
   2332 extern UniqueChars IdToPrintableUTF8(JSContext* cx, HandleId id,
   2333                                     IdToPrintableBehavior behavior);
   2334 
   2335 /*
   2336 * Convert a non-string value to a string, returning null after reporting an
   2337 * error, otherwise returning a new string reference.
   2338 */
   2339 template <AllowGC allowGC>
   2340 extern JSString* ToStringSlow(
   2341    JSContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg);
   2342 
   2343 /*
   2344 * Convert the given value to a string.  This method includes an inline
   2345 * fast-path for the case where the value is already a string; if the value is
   2346 * known not to be a string, use ToStringSlow instead.
   2347 */
   2348 template <AllowGC allowGC>
   2349 static MOZ_ALWAYS_INLINE JSString* ToString(JSContext* cx, JS::HandleValue v) {
   2350  if (v.isString()) {
   2351    return v.toString();
   2352  }
   2353  return ToStringSlow<allowGC>(cx, v);
   2354 }
   2355 
   2356 /*
   2357 * This function implements E-262-3 section 9.8, toString. Convert the given
   2358 * value to a string of characters appended to the given builder. On error, the
   2359 * passed builder may have partial results appended.
   2360 */
   2361 inline bool ValueToStringBuilder(JSContext* cx, const Value& v,
   2362                                 StringBuilder& sb);
   2363 
   2364 } /* namespace js */
   2365 
   2366 MOZ_ALWAYS_INLINE bool JSString::getChar(JSContext* cx, size_t index,
   2367                                         char16_t* code) {
   2368  MOZ_ASSERT(index < length());
   2369 
   2370  /*
   2371   * Optimization for one level deep ropes.
   2372   * This is common for the following pattern:
   2373   *
   2374   * while() {
   2375   *   text = text.substr(0, x) + "bla" + text.substr(x)
   2376   *   test.charCodeAt(x + 1)
   2377   * }
   2378   *
   2379   * Note: keep this in sync with MacroAssembler::loadStringChar and
   2380   * CanAttachStringChar.
   2381   */
   2382  JSString* str;
   2383  if (isRope()) {
   2384    JSRope* rope = &asRope();
   2385    if (uint32_t(index) < rope->leftChild()->length()) {
   2386      str = rope->leftChild();
   2387    } else {
   2388      str = rope->rightChild();
   2389      index -= rope->leftChild()->length();
   2390    }
   2391  } else {
   2392    str = this;
   2393  }
   2394 
   2395  if (!str->ensureLinear(cx)) {
   2396    return false;
   2397  }
   2398 
   2399  *code = str->asLinear().latin1OrTwoByteChar(index);
   2400  return true;
   2401 }
   2402 
   2403 MOZ_ALWAYS_INLINE bool JSString::getCodePoint(JSContext* cx, size_t index,
   2404                                              char32_t* code) {
   2405  // C++ implementation of https://tc39.es/ecma262/#sec-codepointat
   2406  size_t size = length();
   2407  MOZ_ASSERT(index < size);
   2408 
   2409  char16_t first;
   2410  if (!getChar(cx, index, &first)) {
   2411    return false;
   2412  }
   2413  if (!js::unicode::IsLeadSurrogate(first) || index + 1 == size) {
   2414    *code = first;
   2415    return true;
   2416  }
   2417 
   2418  char16_t second;
   2419  if (!getChar(cx, index + 1, &second)) {
   2420    return false;
   2421  }
   2422  if (!js::unicode::IsTrailSurrogate(second)) {
   2423    *code = first;
   2424    return true;
   2425  }
   2426 
   2427  *code = js::unicode::UTF16Decode(first, second);
   2428  return true;
   2429 }
   2430 
   2431 MOZ_ALWAYS_INLINE JSLinearString* JSString::ensureLinear(JSContext* cx) {
   2432  return isLinear() ? &asLinear() : asRope().flatten(cx);
   2433 }
   2434 
   2435 inline JSLinearString* JSString::base() const {
   2436  MOZ_ASSERT(hasBase());
   2437  MOZ_ASSERT_IF(!isAtomRef(), !d.s.u3.base->isInline());
   2438  MOZ_ASSERT(d.s.u3.base->assertIsValidBase());
   2439  if (isAtomRef()) {
   2440    return static_cast<JSLinearString*>(d.s.u3.atom);
   2441  }
   2442  return d.s.u3.base;
   2443 }
   2444 
   2445 inline JSAtom* JSString::atom() const {
   2446  MOZ_ASSERT(isAtomRef());
   2447  return d.s.u3.atom;
   2448 }
   2449 
   2450 inline JSLinearString* JSString::nurseryBaseOrRelocOverlay() const {
   2451  MOZ_ASSERT(hasBase());
   2452  return d.s.u3.base;
   2453 }
   2454 
   2455 inline bool JSString::canOwnDependentChars() const {
   2456  // A string that could own the malloced chars used by another (dependent)
   2457  // string. It will not have a base and must be linear and non-inline.
   2458  return isLinear() && !isInline() && !hasBase();
   2459 }
   2460 
   2461 template <>
   2462 MOZ_ALWAYS_INLINE const char16_t* JSLinearString::nonInlineChars(
   2463    const JS::AutoRequireNoGC& nogc) const {
   2464  return nonInlineTwoByteChars(nogc);
   2465 }
   2466 
   2467 template <>
   2468 MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::nonInlineChars(
   2469    const JS::AutoRequireNoGC& nogc) const {
   2470  return nonInlineLatin1Chars(nogc);
   2471 }
   2472 
   2473 template <>
   2474 MOZ_ALWAYS_INLINE const char16_t* JSLinearString::chars(
   2475    const JS::AutoRequireNoGC& nogc) const {
   2476  return rawTwoByteChars();
   2477 }
   2478 
   2479 template <>
   2480 MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::chars(
   2481    const JS::AutoRequireNoGC& nogc) const {
   2482  return rawLatin1Chars();
   2483 }
   2484 
   2485 template <>
   2486 MOZ_ALWAYS_INLINE js::UniquePtr<JS::Latin1Char[], JS::FreePolicy>
   2487 JSRope::copyChars<JS::Latin1Char>(JSContext* maybecx,
   2488                                  arena_id_t destArenaId) const {
   2489  return copyLatin1Chars(maybecx, destArenaId);
   2490 }
   2491 
   2492 template <>
   2493 MOZ_ALWAYS_INLINE JS::UniqueTwoByteChars JSRope::copyChars<char16_t>(
   2494    JSContext* maybecx, arena_id_t destArenaId) const {
   2495  return copyTwoByteChars(maybecx, destArenaId);
   2496 }
   2497 
   2498 template <>
   2499 MOZ_ALWAYS_INLINE bool JSThinInlineString::lengthFits<JS::Latin1Char>(
   2500    size_t length) {
   2501  return length <= MAX_LENGTH_LATIN1;
   2502 }
   2503 
   2504 template <>
   2505 MOZ_ALWAYS_INLINE bool JSThinInlineString::lengthFits<char16_t>(size_t length) {
   2506  return length <= MAX_LENGTH_TWO_BYTE;
   2507 }
   2508 
   2509 template <>
   2510 MOZ_ALWAYS_INLINE bool JSFatInlineString::lengthFits<JS::Latin1Char>(
   2511    size_t length) {
   2512  static_assert(
   2513      (INLINE_EXTENSION_CHARS_LATIN1 * sizeof(char)) % js::gc::CellAlignBytes ==
   2514          0,
   2515      "fat inline strings' Latin1 characters don't exactly "
   2516      "fill subsequent cells and thus are wasteful");
   2517  static_assert(MAX_LENGTH_LATIN1 ==
   2518                    (sizeof(JSFatInlineString) -
   2519                     offsetof(JSFatInlineString, d.inlineStorageLatin1)) /
   2520                        sizeof(char),
   2521                "MAX_LENGTH_LATIN1 must be one less than inline Latin1 "
   2522                "storage count");
   2523 
   2524  return length <= MAX_LENGTH_LATIN1;
   2525 }
   2526 
   2527 template <>
   2528 MOZ_ALWAYS_INLINE bool JSFatInlineString::lengthFits<char16_t>(size_t length) {
   2529  static_assert((INLINE_EXTENSION_CHARS_TWO_BYTE * sizeof(char16_t)) %
   2530                        js::gc::CellAlignBytes ==
   2531                    0,
   2532                "fat inline strings' char16_t characters don't exactly "
   2533                "fill subsequent cells and thus are wasteful");
   2534  static_assert(MAX_LENGTH_TWO_BYTE ==
   2535                    (sizeof(JSFatInlineString) -
   2536                     offsetof(JSFatInlineString, d.inlineStorageTwoByte)) /
   2537                        sizeof(char16_t),
   2538                "MAX_LENGTH_TWO_BYTE must be one less than inline "
   2539                "char16_t storage count");
   2540 
   2541  return length <= MAX_LENGTH_TWO_BYTE;
   2542 }
   2543 
   2544 template <>
   2545 MOZ_ALWAYS_INLINE bool JSInlineString::lengthFits<JS::Latin1Char>(
   2546    size_t length) {
   2547  // If it fits in a fat inline string, it fits in any inline string.
   2548  return JSFatInlineString::lengthFits<JS::Latin1Char>(length);
   2549 }
   2550 
   2551 template <>
   2552 MOZ_ALWAYS_INLINE bool JSInlineString::lengthFits<char16_t>(size_t length) {
   2553  // If it fits in a fat inline string, it fits in any inline string.
   2554  return JSFatInlineString::lengthFits<char16_t>(length);
   2555 }
   2556 
   2557 template <>
   2558 MOZ_ALWAYS_INLINE bool js::ThinInlineAtom::lengthFits<JS::Latin1Char>(
   2559    size_t length) {
   2560  return length <= MAX_LENGTH_LATIN1;
   2561 }
   2562 
   2563 template <>
   2564 MOZ_ALWAYS_INLINE bool js::ThinInlineAtom::lengthFits<char16_t>(size_t length) {
   2565  return length <= MAX_LENGTH_TWO_BYTE;
   2566 }
   2567 
   2568 template <>
   2569 MOZ_ALWAYS_INLINE bool js::FatInlineAtom::lengthFits<JS::Latin1Char>(
   2570    size_t length) {
   2571  return length <= MAX_LENGTH_LATIN1;
   2572 }
   2573 
   2574 template <>
   2575 MOZ_ALWAYS_INLINE bool js::FatInlineAtom::lengthFits<char16_t>(size_t length) {
   2576  return length <= MAX_LENGTH_TWO_BYTE;
   2577 }
   2578 
   2579 template <>
   2580 MOZ_ALWAYS_INLINE bool JSAtom::lengthFitsInline<JS::Latin1Char>(size_t length) {
   2581  // If it fits in a fat inline atom, it fits in any inline atom.
   2582  return js::FatInlineAtom::lengthFits<JS::Latin1Char>(length);
   2583 }
   2584 
   2585 template <>
   2586 MOZ_ALWAYS_INLINE bool JSAtom::lengthFitsInline<char16_t>(size_t length) {
   2587  // If it fits in a fat inline atom, it fits in any inline atom.
   2588  return js::FatInlineAtom::lengthFits<char16_t>(length);
   2589 }
   2590 
   2591 template <>
   2592 MOZ_ALWAYS_INLINE void JSString::setNonInlineChars(const char16_t* chars,
   2593                                                   bool usesStringBuffer) {
   2594  // Check that the new buffer is located in the StringBufferArena
   2595  if (!(isAtomRef() && atom()->isInline())) {
   2596    checkStringCharsArena(chars, usesStringBuffer);
   2597  }
   2598  d.s.u2.nonInlineCharsTwoByte = chars;
   2599 }
   2600 
   2601 template <>
   2602 MOZ_ALWAYS_INLINE void JSString::setNonInlineChars(const JS::Latin1Char* chars,
   2603                                                   bool usesStringBuffer) {
   2604  // Check that the new buffer is located in the StringBufferArena
   2605  if (!(isAtomRef() && atom()->isInline())) {
   2606    checkStringCharsArena(chars, usesStringBuffer);
   2607  }
   2608  d.s.u2.nonInlineCharsLatin1 = chars;
   2609 }
   2610 
   2611 MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::rawLatin1Chars() const {
   2612  MOZ_ASSERT(JSString::isLinear());
   2613  MOZ_ASSERT(hasLatin1Chars());
   2614  return isInline() ? d.inlineStorageLatin1 : d.s.u2.nonInlineCharsLatin1;
   2615 }
   2616 
   2617 MOZ_ALWAYS_INLINE const char16_t* JSLinearString::rawTwoByteChars() const {
   2618  MOZ_ASSERT(JSString::isLinear());
   2619  MOZ_ASSERT(hasTwoByteChars());
   2620  return isInline() ? d.inlineStorageTwoByte : d.s.u2.nonInlineCharsTwoByte;
   2621 }
   2622 
   2623 inline js::PropertyName* JSAtom::asPropertyName() {
   2624  MOZ_ASSERT(!isIndex());
   2625  return static_cast<js::PropertyName*>(this);
   2626 }
   2627 
   2628 inline bool JSLinearString::isIndex(uint32_t* indexp) const {
   2629  MOZ_ASSERT(JSString::isLinear());
   2630 
   2631  if (isAtom()) {
   2632    return asAtom().isIndex(indexp);
   2633  }
   2634 
   2635  if (JSString::hasIndexValue()) {
   2636    *indexp = getIndexValue();
   2637    return true;
   2638  }
   2639 
   2640  return isIndexSlow(indexp);
   2641 }
   2642 
   2643 namespace js {
   2644 namespace gc {
   2645 template <>
   2646 inline JSString* Cell::as<JSString>() {
   2647  MOZ_ASSERT(is<JSString>());
   2648  return reinterpret_cast<JSString*>(this);
   2649 }
   2650 
   2651 template <>
   2652 inline JSString* TenuredCell::as<JSString>() {
   2653  MOZ_ASSERT(is<JSString>());
   2654  return reinterpret_cast<JSString*>(this);
   2655 }
   2656 
   2657 }  // namespace gc
   2658 }  // namespace js
   2659 
   2660 #endif /* vm_StringType_h */