tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ParserAtom.h (30946B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef frontend_ParserAtom_h
      8 #define frontend_ParserAtom_h
      9 
     10 #include "mozilla/MemoryReporting.h"  // mozilla::MallocSizeOf
     11 #include "mozilla/Range.h"            // mozilla::Range
     12 #include "mozilla/Span.h"             // mozilla::Span
     13 #include "mozilla/TextUtils.h"
     14 
     15 #include <stddef.h>
     16 #include <stdint.h>
     17 
     18 #include "jstypes.h"
     19 #include "NamespaceImports.h"
     20 
     21 #include "frontend/TypedIndex.h"  // TypedIndex
     22 #include "js/HashTable.h"         // HashMap
     23 #include "js/ProtoKey.h"          // JS_FOR_EACH_PROTOTYPE
     24 #include "js/Symbol.h"            // JS_FOR_EACH_WELL_KNOWN_SYMBOL
     25 #include "js/TypeDecls.h"         // Latin1Char
     26 #include "js/Utility.h"           // UniqueChars
     27 #include "js/Vector.h"            // Vector
     28 #include "threading/Mutex.h"      // Mutex
     29 #include "util/Text.h"            // InflatedChar16Sequence
     30 #include "vm/CommonPropertyNames.h"
     31 #include "vm/StaticStrings.h"
     32 #include "vm/WellKnownAtom.h"  // WellKnownAtomId, WellKnownAtomInfo
     33 
     34 struct JS_PUBLIC_API JSContext;
     35 
     36 class JSAtom;
     37 class JSString;
     38 
     39 namespace mozilla {
     40 union Utf8Unit;
     41 }
     42 
     43 namespace js {
     44 
     45 class AtomSet;
     46 class JS_PUBLIC_API GenericPrinter;
     47 class LifoAlloc;
     48 class StringBuilder;
     49 
     50 namespace frontend {
     51 
     52 struct CompilationAtomCache;
     53 struct CompilationStencil;
     54 
     55 template <typename CharT>
     56 class SpecificParserAtomLookup;
     57 
     58 // These types correspond into indices in the StaticStrings arrays.
     59 enum class Length1StaticParserString : uint8_t;
     60 enum class Length2StaticParserString : uint16_t;
     61 enum class Length3StaticParserString : uint8_t;
     62 
     63 class ParserAtom;
     64 using ParserAtomIndex = TypedIndex<ParserAtom>;
     65 
     66 // ParserAtomIndex, WellKnownAtomId, Length1StaticParserString,
     67 // Length2StaticParserString, Length3StaticParserString, or null.
     68 //
     69 // 0x0000_0000  Null atom
     70 //
     71 // 0x1YYY_YYYY  28-bit ParserAtom
     72 //
     73 // 0x2000_YYYY  Well-known atom ID
     74 // 0x2001_YYYY  Static length-1 atom : whole Latin1 range
     75 // 0x2002_YYYY  Static length-2 atom : `[A-Za-z0-9$_]{2}`
     76 // 0x2003_YYYY  Static length-3 atom : decimal "100" to "255"
     77 class TaggedParserAtomIndex {
     78  uint32_t data_;
     79 
     80 public:
     81  static constexpr size_t IndexBit = 28;
     82  static constexpr size_t IndexMask = BitMask(IndexBit);
     83 
     84  static constexpr size_t TagShift = IndexBit;
     85  static constexpr size_t TagBit = 4;
     86  static constexpr size_t TagMask = BitMask(TagBit) << TagShift;
     87 
     88  enum class Kind : uint32_t {
     89    Null = 0,
     90    ParserAtomIndex,
     91    WellKnown,
     92  };
     93 
     94 private:
     95  static constexpr size_t SmallIndexBit = 16;
     96  static constexpr size_t SmallIndexMask = BitMask(SmallIndexBit);
     97 
     98  static constexpr size_t SubTagShift = SmallIndexBit;
     99  static constexpr size_t SubTagBit = 2;
    100  static constexpr size_t SubTagMask = BitMask(SubTagBit) << SubTagShift;
    101 
    102 public:
    103  static constexpr uint32_t NullTag = uint32_t(Kind::Null) << TagShift;
    104  static constexpr uint32_t ParserAtomIndexTag = uint32_t(Kind::ParserAtomIndex)
    105                                                 << TagShift;
    106  static constexpr uint32_t WellKnownTag = uint32_t(Kind::WellKnown)
    107                                           << TagShift;
    108 
    109 private:
    110  static constexpr uint32_t WellKnownSubTag = 0 << SubTagShift;
    111  static constexpr uint32_t Length1StaticSubTag = 1 << SubTagShift;
    112  static constexpr uint32_t Length2StaticSubTag = 2 << SubTagShift;
    113  static constexpr uint32_t Length3StaticSubTag = 3 << SubTagShift;
    114 
    115 public:
    116  static constexpr uint32_t IndexLimit = Bit(IndexBit);
    117  static constexpr uint32_t SmallIndexLimit = Bit(SmallIndexBit);
    118 
    119  static constexpr size_t Length1StaticLimit = 256U;
    120  static constexpr size_t Length2StaticLimit =
    121      StaticStrings::NUM_LENGTH2_ENTRIES;
    122  static constexpr size_t Length3StaticLimit = 256U;
    123 
    124 private:
    125  explicit TaggedParserAtomIndex(uint32_t data) : data_(data) {}
    126 
    127 public:
    128  constexpr TaggedParserAtomIndex() : data_(NullTag) {}
    129 
    130  explicit constexpr TaggedParserAtomIndex(ParserAtomIndex index)
    131      : data_(index.index | ParserAtomIndexTag) {
    132    MOZ_ASSERT(index.index < IndexLimit);
    133  }
    134  explicit constexpr TaggedParserAtomIndex(WellKnownAtomId index)
    135      : data_(uint32_t(index) | WellKnownTag | WellKnownSubTag) {
    136    MOZ_ASSERT(uint32_t(index) < SmallIndexLimit);
    137 
    138    // Length1Static/Length2Static string shouldn't use WellKnownAtomId.
    139 #define CHECK_(NAME, _) MOZ_ASSERT(index != WellKnownAtomId::NAME);
    140    FOR_EACH_NON_EMPTY_TINY_PROPERTYNAME(CHECK_)
    141 #undef CHECK_
    142  }
    143  explicit constexpr TaggedParserAtomIndex(Length1StaticParserString index)
    144      : data_(uint32_t(index) | WellKnownTag | Length1StaticSubTag) {}
    145  explicit constexpr TaggedParserAtomIndex(Length2StaticParserString index)
    146      : data_(uint32_t(index) | WellKnownTag | Length2StaticSubTag) {}
    147  explicit constexpr TaggedParserAtomIndex(Length3StaticParserString index)
    148      : data_(uint32_t(index) | WellKnownTag | Length3StaticSubTag) {}
    149 
    150  class WellKnown {
    151   public:
    152 #define METHOD_(NAME, _)                                 \
    153  static constexpr TaggedParserAtomIndex NAME() {        \
    154    return TaggedParserAtomIndex(WellKnownAtomId::NAME); \
    155  }
    156    FOR_EACH_NONTINY_COMMON_PROPERTYNAME(METHOD_)
    157 #undef METHOD_
    158 
    159 #define METHOD_(NAME, _)                                 \
    160  static constexpr TaggedParserAtomIndex NAME() {        \
    161    return TaggedParserAtomIndex(WellKnownAtomId::NAME); \
    162  }
    163    JS_FOR_EACH_PROTOTYPE(METHOD_)
    164 #undef METHOD_
    165 
    166 #define METHOD_(NAME)                                    \
    167  static constexpr TaggedParserAtomIndex NAME() {        \
    168    return TaggedParserAtomIndex(WellKnownAtomId::NAME); \
    169  }
    170    JS_FOR_EACH_WELL_KNOWN_SYMBOL(METHOD_)
    171 #undef METHOD_
    172 
    173 #define METHOD_(NAME, STR)                                             \
    174  static constexpr TaggedParserAtomIndex NAME() {                      \
    175    return TaggedParserAtomIndex(Length1StaticParserString((STR)[0])); \
    176  }
    177    FOR_EACH_LENGTH1_PROPERTYNAME(METHOD_)
    178 #undef METHOD_
    179 
    180 #define METHOD_(NAME, STR)                                            \
    181  static constexpr TaggedParserAtomIndex NAME() {                     \
    182    return TaggedParserAtomIndex(Length2StaticParserString(           \
    183        (StaticStrings::getLength2IndexStatic((STR)[0], (STR)[1])))); \
    184  }
    185    FOR_EACH_LENGTH2_PROPERTYNAME(METHOD_)
    186 #undef METHOD_
    187 
    188    static constexpr TaggedParserAtomIndex empty() {
    189      return TaggedParserAtomIndex(WellKnownAtomId::empty_);
    190    }
    191  };
    192 
    193  // The value of rawData() for WellKnown TaggedParserAtomIndex.
    194  // For using in switch-case.
    195  class WellKnownRawData {
    196   public:
    197 #define METHOD_(NAME, _)                                                     \
    198  static constexpr uint32_t NAME() {                                         \
    199    return uint32_t(WellKnownAtomId::NAME) | WellKnownTag | WellKnownSubTag; \
    200  }
    201    FOR_EACH_NONTINY_COMMON_PROPERTYNAME(METHOD_)
    202 #undef METHOD_
    203 
    204 #define METHOD_(NAME, _)                                                     \
    205  static constexpr uint32_t NAME() {                                         \
    206    return uint32_t(WellKnownAtomId::NAME) | WellKnownTag | WellKnownSubTag; \
    207  }
    208    JS_FOR_EACH_PROTOTYPE(METHOD_)
    209 #undef METHOD_
    210 
    211 #define METHOD_(NAME)                                                        \
    212  static constexpr uint32_t NAME() {                                         \
    213    return uint32_t(WellKnownAtomId::NAME) | WellKnownTag | WellKnownSubTag; \
    214  }
    215    JS_FOR_EACH_WELL_KNOWN_SYMBOL(METHOD_)
    216 #undef METHOD_
    217 
    218 #define METHOD_(NAME, STR)                                          \
    219  static constexpr uint32_t NAME() {                                \
    220    return uint32_t((STR)[0]) | WellKnownTag | Length1StaticSubTag; \
    221  }
    222    FOR_EACH_LENGTH1_PROPERTYNAME(METHOD_)
    223 #undef METHOD_
    224 
    225 #define METHOD_(NAME, STR)                                                 \
    226  static constexpr uint32_t NAME() {                                       \
    227    return uint32_t(                                                       \
    228               StaticStrings::getLength2IndexStatic((STR)[0], (STR)[1])) | \
    229           WellKnownTag | Length2StaticSubTag;                             \
    230  }
    231    FOR_EACH_LENGTH2_PROPERTYNAME(METHOD_)
    232 #undef METHOD_
    233 
    234    static constexpr uint32_t empty() {
    235      return uint32_t(WellKnownAtomId::empty_) | WellKnownTag | WellKnownSubTag;
    236    }
    237  };
    238 
    239  // NOTE: this is not well-known "null".
    240  static TaggedParserAtomIndex null() { return TaggedParserAtomIndex(); }
    241 
    242 #ifdef DEBUG
    243  void validateRaw();
    244 #endif
    245 
    246  static TaggedParserAtomIndex fromRaw(uint32_t data) {
    247    auto result = TaggedParserAtomIndex(data);
    248 #ifdef DEBUG
    249    result.validateRaw();
    250 #endif
    251    return result;
    252  }
    253 
    254  bool isParserAtomIndex() const {
    255    return (data_ & TagMask) == ParserAtomIndexTag;
    256  }
    257  bool isWellKnownAtomId() const {
    258    return (data_ & (TagMask | SubTagMask)) == (WellKnownTag | WellKnownSubTag);
    259  }
    260  bool isLength1StaticParserString() const {
    261    return (data_ & (TagMask | SubTagMask)) ==
    262           (WellKnownTag | Length1StaticSubTag);
    263  }
    264  bool isLength2StaticParserString() const {
    265    return (data_ & (TagMask | SubTagMask)) ==
    266           (WellKnownTag | Length2StaticSubTag);
    267  }
    268  bool isLength3StaticParserString() const {
    269    return (data_ & (TagMask | SubTagMask)) ==
    270           (WellKnownTag | Length3StaticSubTag);
    271  }
    272  bool isNull() const {
    273    bool result = !data_;
    274    MOZ_ASSERT_IF(result, (data_ & TagMask) == NullTag);
    275    return result;
    276  }
    277  HashNumber staticOrWellKnownHash() const;
    278 
    279  ParserAtomIndex toParserAtomIndex() const {
    280    MOZ_ASSERT(isParserAtomIndex());
    281    return ParserAtomIndex(data_ & IndexMask);
    282  }
    283  WellKnownAtomId toWellKnownAtomId() const {
    284    MOZ_ASSERT(isWellKnownAtomId());
    285    return WellKnownAtomId(data_ & SmallIndexMask);
    286  }
    287  Length1StaticParserString toLength1StaticParserString() const {
    288    MOZ_ASSERT(isLength1StaticParserString());
    289    return Length1StaticParserString(data_ & SmallIndexMask);
    290  }
    291  Length2StaticParserString toLength2StaticParserString() const {
    292    MOZ_ASSERT(isLength2StaticParserString());
    293    return Length2StaticParserString(data_ & SmallIndexMask);
    294  }
    295  Length3StaticParserString toLength3StaticParserString() const {
    296    MOZ_ASSERT(isLength3StaticParserString());
    297    return Length3StaticParserString(data_ & SmallIndexMask);
    298  }
    299 
    300  uint32_t* rawDataRef() { return &data_; }
    301  uint32_t rawData() const { return data_; }
    302 
    303  bool operator==(const TaggedParserAtomIndex& rhs) const {
    304    return data_ == rhs.data_;
    305  }
    306  bool operator!=(const TaggedParserAtomIndex& rhs) const {
    307    return data_ != rhs.data_;
    308  }
    309 
    310  explicit operator bool() const { return !isNull(); }
    311 };
    312 
    313 // Trivial variant of TaggedParserAtomIndex, to use in collection that requires
    314 // trivial type.
    315 // Provides minimal set of methods to use in collection.
    316 class TrivialTaggedParserAtomIndex {
    317  uint32_t data_;
    318 
    319 public:
    320  static TrivialTaggedParserAtomIndex from(TaggedParserAtomIndex index) {
    321    TrivialTaggedParserAtomIndex result;
    322    result.data_ = index.rawData();
    323    return result;
    324  }
    325 
    326  operator TaggedParserAtomIndex() const {
    327    return TaggedParserAtomIndex::fromRaw(data_);
    328  }
    329 
    330  static TrivialTaggedParserAtomIndex null() {
    331    TrivialTaggedParserAtomIndex result;
    332    result.data_ = 0;
    333    return result;
    334  }
    335 
    336  bool isNull() const {
    337    static_assert(TaggedParserAtomIndex::NullTag == 0);
    338    return data_ == 0;
    339  }
    340 
    341  uint32_t rawData() const { return data_; }
    342 
    343  bool operator==(const TrivialTaggedParserAtomIndex& rhs) const {
    344    return data_ == rhs.data_;
    345  }
    346  bool operator!=(const TrivialTaggedParserAtomIndex& rhs) const {
    347    return data_ != rhs.data_;
    348  }
    349 
    350  explicit operator bool() const { return !isNull(); }
    351 };
    352 
    353 /**
    354 * A ParserAtom is an in-parser representation of an interned atomic
    355 * string.  It mostly mirrors the information carried by a JSAtom*.
    356 *
    357 * The atom contents are stored in one of two locations:
    358 *  1. Inline Latin1Char storage (immediately after the ParserAtom memory).
    359 *  2. Inline char16_t storage (immediately after the ParserAtom memory).
    360 */
    361 class alignas(alignof(uint32_t)) ParserAtom {
    362  friend class ParserAtomsTable;
    363  friend class WellKnownParserAtoms;
    364 
    365  static const uint16_t MAX_LATIN1_CHAR = 0xff;
    366 
    367  // Bit flags inside flags_.
    368  static constexpr uint32_t HasTwoByteCharsFlag = 1 << 0;
    369  static constexpr uint32_t UsedByStencilFlag = 1 << 1;
    370  static constexpr uint32_t AtomizeFlag = 1 << 2;
    371 
    372 public:
    373  // Whether to atomize the ParserAtom during instantiation.
    374  //
    375  // If this ParserAtom is used by opcode with JOF_ATOM, or used as a binding
    376  // in scope, it needs to be instantiated as JSAtom.
    377  // Otherwise, it needs to be instantiated as LinearString, to reduce the
    378  // cost of atomization.
    379  enum class Atomize : uint32_t {
    380    No = 0,
    381    Yes = AtomizeFlag,
    382  };
    383 
    384 private:
    385  // Helper routine to read some sequence of two-byte chars, and write them
    386  // into a target buffer of a particular character width.
    387  //
    388  // The characters in the sequence must have been verified prior
    389  template <typename CharT, typename SeqCharT>
    390  static void drainChar16Seq(CharT* buf, InflatedChar16Sequence<SeqCharT> seq,
    391                             uint32_t length) {
    392    static_assert(
    393        std::is_same_v<CharT, char16_t> || std::is_same_v<CharT, Latin1Char>,
    394        "Invalid target buffer type.");
    395    CharT* cur = buf;
    396    while (seq.hasMore()) {
    397      char16_t ch = seq.next();
    398      if constexpr (std::is_same_v<CharT, Latin1Char>) {
    399        MOZ_ASSERT(ch <= MAX_LATIN1_CHAR);
    400      }
    401      MOZ_ASSERT(cur < (buf + length));
    402      *cur = ch;
    403      cur++;
    404    }
    405  }
    406 
    407 private:
    408  // The JSAtom-compatible hash of the string.
    409  HashNumber hash_ = 0;
    410 
    411  // The length of the buffer in chars_.
    412  uint32_t length_ = 0;
    413 
    414  uint32_t flags_ = 0;
    415 
    416  // End of fields.
    417 
    418  ParserAtom(uint32_t length, HashNumber hash, bool hasTwoByteChars)
    419      : hash_(hash),
    420        length_(length),
    421        flags_(hasTwoByteChars ? HasTwoByteCharsFlag : 0) {}
    422 
    423 public:
    424  // The constexpr constructor is used by XDR
    425  constexpr ParserAtom() = default;
    426 
    427  // ParserAtoms may own their content buffers in variant_, and thus
    428  // cannot be copy-constructed - as a new chars would need to be allocated.
    429  ParserAtom(const ParserAtom&) = delete;
    430  ParserAtom(ParserAtom&& other) = delete;
    431 
    432  template <typename CharT, typename SeqCharT>
    433  static ParserAtom* allocate(FrontendContext* fc, LifoAlloc& alloc,
    434                              InflatedChar16Sequence<SeqCharT> seq,
    435                              uint32_t length, HashNumber hash);
    436 
    437  bool hasLatin1Chars() const { return !(flags_ & HasTwoByteCharsFlag); }
    438  bool hasTwoByteChars() const { return flags_ & HasTwoByteCharsFlag; }
    439 
    440  bool isAscii() const {
    441    if (hasTwoByteChars()) {
    442      return false;
    443    }
    444    for (Latin1Char ch : latin1Range()) {
    445      if (!mozilla::IsAscii(ch)) {
    446        return false;
    447      }
    448    }
    449    return true;
    450  }
    451 
    452  bool isPrivateName() const {
    453    if (length() < 2) {
    454      return false;
    455    }
    456 
    457    return charAt(0) == '#';
    458  }
    459 
    460  HashNumber hash() const { return hash_; }
    461  uint32_t length() const { return length_; }
    462 
    463  bool isUsedByStencil() const { return flags_ & UsedByStencilFlag; }
    464 
    465 private:
    466  bool isMarkedAtomize() const { return flags_ & AtomizeFlag; }
    467 
    468  static constexpr uint32_t MinimumLengthForNonAtom = 8;
    469 
    470 public:
    471  bool isInstantiatedAsJSAtom() const;
    472 
    473  template <typename CharT>
    474  bool equalsSeq(HashNumber hash, InflatedChar16Sequence<CharT> seq) const;
    475 
    476  // Convert NotInstantiated and usedByStencil entry to a js-atom.
    477  JSString* instantiateString(JSContext* cx, FrontendContext* fc,
    478                              ParserAtomIndex index,
    479                              CompilationAtomCache& atomCache) const;
    480  JSAtom* instantiateAtom(JSContext* cx, FrontendContext* fc,
    481                          ParserAtomIndex index,
    482                          CompilationAtomCache& atomCache) const;
    483  JSAtom* instantiatePermanentAtom(JSContext* cx, FrontendContext* fc,
    484                                   AtomSet& atomSet, ParserAtomIndex index,
    485                                   CompilationAtomCache& atomCache) const;
    486 
    487 private:
    488  void markUsedByStencil(Atomize atomize) {
    489    flags_ |= UsedByStencilFlag | uint32_t(atomize);
    490  }
    491  void markAtomize(Atomize atomize) { flags_ |= uint32_t(atomize); }
    492 
    493  template <typename CharT>
    494  const CharT* chars() const {
    495    MOZ_ASSERT(sizeof(CharT) == (hasTwoByteChars() ? 2 : 1));
    496    return reinterpret_cast<const CharT*>(this + 1);
    497  }
    498 
    499  template <typename CharT>
    500  CharT* chars() {
    501    MOZ_ASSERT(sizeof(CharT) == (hasTwoByteChars() ? 2 : 1));
    502    return reinterpret_cast<CharT*>(this + 1);
    503  }
    504 
    505  const Latin1Char* latin1Chars() const { return chars<Latin1Char>(); }
    506  const char16_t* twoByteChars() const { return chars<char16_t>(); }
    507  mozilla::Range<const Latin1Char> latin1Range() const {
    508    return mozilla::Range(latin1Chars(), length_);
    509  }
    510  mozilla::Range<const char16_t> twoByteRange() const {
    511    return mozilla::Range(twoByteChars(), length_);
    512  }
    513 
    514  // Returns index-th char.
    515  // Boundary check isn't performed.
    516  char16_t charAt(size_t index) const {
    517    MOZ_ASSERT(index < length());
    518    if (hasLatin1Chars()) {
    519      return latin1Chars()[index];
    520    }
    521    return twoByteChars()[index];
    522  }
    523 
    524 public:
    525 #if defined(DEBUG) || defined(JS_JITSPEW)
    526  void dump() const;
    527  void dumpCharsNoQuote(js::GenericPrinter& out) const;
    528 #endif
    529 };
    530 
    531 /**
    532 * A lookup structure that allows for querying ParserAtoms in
    533 * a hashtable using a flexible input type that supports string
    534 * representations of various forms.
    535 */
    536 class ParserAtomLookup {
    537 protected:
    538  HashNumber hash_;
    539 
    540  ParserAtomLookup(HashNumber hash) : hash_(hash) {}
    541 
    542 public:
    543  HashNumber hash() const { return hash_; }
    544 
    545  virtual bool equalsEntry(const ParserAtom* entry) const = 0;
    546  virtual bool equalsEntry(const WellKnownAtomInfo* info) const = 0;
    547 };
    548 
    549 struct ParserAtomLookupHasher {
    550  using Lookup = ParserAtomLookup;
    551 
    552  static inline HashNumber hash(const Lookup& l) { return l.hash(); }
    553  static inline bool match(const ParserAtom* entry, const Lookup& l) {
    554    return l.equalsEntry(entry);
    555  }
    556 };
    557 
    558 struct WellKnownAtomInfoHasher {
    559  using Lookup = ParserAtomLookup;
    560 
    561  static inline HashNumber hash(const Lookup& l) { return l.hash(); }
    562  static inline bool match(const WellKnownAtomInfo* info, const Lookup& l) {
    563    return l.equalsEntry(info);
    564  }
    565 };
    566 
    567 using ParserAtomVector = Vector<ParserAtom*, 0, js::SystemAllocPolicy>;
    568 using ParserAtomSpan = mozilla::Span<ParserAtom*>;
    569 
    570 /**
    571 * WellKnownParserAtoms allows the parser to lookup up specific atoms in
    572 * constant time.
    573 */
    574 class WellKnownParserAtoms {
    575  static WellKnownParserAtoms singleton_;
    576 
    577  // Common property and prototype names are tracked in a hash table. This table
    578  // does not key for any items already in a direct-indexing tiny atom table.
    579  using EntryMap = HashMap<const WellKnownAtomInfo*, TaggedParserAtomIndex,
    580                           WellKnownAtomInfoHasher, js::SystemAllocPolicy>;
    581  EntryMap wellKnownMap_;
    582 
    583  bool initSingle(const WellKnownAtomInfo& info, TaggedParserAtomIndex index);
    584 
    585  bool init();
    586  void free();
    587 
    588 public:
    589  static bool initSingleton();
    590  static void freeSingleton();
    591 
    592  static WellKnownParserAtoms& getSingleton() {
    593    MOZ_ASSERT(!singleton_.wellKnownMap_.empty());
    594    return singleton_;
    595  }
    596 
    597  // Maximum length of any well known atoms. This can be increased if needed.
    598  static constexpr size_t MaxWellKnownLength = 32;
    599 
    600  template <typename CharT>
    601  TaggedParserAtomIndex lookupChar16Seq(
    602      const SpecificParserAtomLookup<CharT>& lookup) const;
    603 
    604  template <typename CharsT>
    605  TaggedParserAtomIndex lookupTinyIndex(CharsT chars, size_t length) const {
    606    static_assert(std::is_same_v<CharsT, const Latin1Char*> ||
    607                      std::is_same_v<CharsT, const char16_t*> ||
    608                      std::is_same_v<CharsT, const char*> ||
    609                      std::is_same_v<CharsT, char16_t*>,
    610                  "This assert mostly explicitly documents the calling types, "
    611                  "and forces that to be updated if new types show up.");
    612    switch (length) {
    613      case 0:
    614        return TaggedParserAtomIndex::WellKnown::empty();
    615 
    616      case 1: {
    617        if (char16_t(chars[0]) < TaggedParserAtomIndex::Length1StaticLimit) {
    618          return TaggedParserAtomIndex(Length1StaticParserString(chars[0]));
    619        }
    620        break;
    621      }
    622 
    623      case 2:
    624        if (StaticStrings::fitsInSmallChar(chars[0]) &&
    625            StaticStrings::fitsInSmallChar(chars[1])) {
    626          return TaggedParserAtomIndex(Length2StaticParserString(
    627              StaticStrings::getLength2Index(chars[0], chars[1])));
    628        }
    629        break;
    630 
    631      case 3: {
    632        int i;
    633        if (StaticStrings::fitsInLength3Static(chars[0], chars[1], chars[2],
    634                                               &i)) {
    635          return TaggedParserAtomIndex(Length3StaticParserString(i));
    636        }
    637        break;
    638      }
    639    }
    640 
    641    // No match on tiny Atoms
    642    return TaggedParserAtomIndex::null();
    643  }
    644 
    645  TaggedParserAtomIndex lookupTinyIndexUTF8(const mozilla::Utf8Unit* utf8Ptr,
    646                                            size_t nbyte) const;
    647 
    648  size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const {
    649    return wellKnownMap_.shallowSizeOfExcludingThis(mallocSizeOf);
    650  }
    651 };
    652 
    653 bool InstantiateMarkedAtoms(JSContext* cx, FrontendContext* fc,
    654                            const ParserAtomSpan& entries,
    655                            CompilationAtomCache& atomCache);
    656 
    657 bool InstantiateMarkedAtomsAsPermanent(JSContext* cx, FrontendContext* fc,
    658                                       AtomSet& atomSet,
    659                                       const ParserAtomSpan& entries,
    660                                       CompilationAtomCache& atomCache);
    661 
    662 /**
    663 * A ParserAtomsTable owns and manages the vector of ParserAtom entries
    664 * associated with a given compile session.
    665 */
    666 class ParserAtomsTable {
    667  friend struct CompilationStencil;
    668 
    669 private:
    670  LifoAlloc* alloc_;
    671 
    672  // The ParserAtom are owned by the LifoAlloc.
    673  using EntryMap = HashMap<const ParserAtom*, TaggedParserAtomIndex,
    674                           ParserAtomLookupHasher, js::SystemAllocPolicy>;
    675  EntryMap entryMap_;
    676  ParserAtomVector entries_;
    677 
    678 public:
    679  explicit ParserAtomsTable(LifoAlloc& alloc);
    680  ParserAtomsTable(ParserAtomsTable&&) = default;
    681  ParserAtomsTable& operator=(ParserAtomsTable&& other) noexcept {
    682    entryMap_ = std::move(other.entryMap_);
    683    entries_ = std::move(other.entries_);
    684    return *this;
    685  }
    686 
    687  void fixupAlloc(LifoAlloc& alloc) { alloc_ = &alloc; }
    688 
    689  size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const {
    690    return entryMap_.shallowSizeOfExcludingThis(mallocSizeOf) +
    691           entries_.sizeOfExcludingThis(mallocSizeOf);
    692  }
    693  size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const {
    694    return mallocSizeOf(this) + sizeOfExcludingThis(mallocSizeOf);
    695  }
    696 
    697 private:
    698  // Internal APIs for interning to the table after well-known atoms cases have
    699  // been tested.
    700  TaggedParserAtomIndex addEntry(FrontendContext* fc, EntryMap::AddPtr& addPtr,
    701                                 ParserAtom* entry);
    702  template <typename AtomCharT, typename SeqCharT>
    703  TaggedParserAtomIndex internChar16Seq(FrontendContext* fc,
    704                                        EntryMap::AddPtr& addPtr,
    705                                        HashNumber hash,
    706                                        InflatedChar16Sequence<SeqCharT> seq,
    707                                        uint32_t length);
    708 
    709  template <typename AtomCharT>
    710  TaggedParserAtomIndex internExternalParserAtomImpl(FrontendContext* fc,
    711                                                     const ParserAtom* atom);
    712 
    713 public:
    714  TaggedParserAtomIndex internAscii(FrontendContext* fc, const char* asciiPtr,
    715                                    uint32_t length);
    716 
    717  TaggedParserAtomIndex internLatin1(FrontendContext* fc,
    718                                     const JS::Latin1Char* latin1Ptr,
    719                                     uint32_t length);
    720 
    721  TaggedParserAtomIndex internUtf8(FrontendContext* fc,
    722                                   const mozilla::Utf8Unit* utf8Ptr,
    723                                   uint32_t nbyte);
    724 
    725  TaggedParserAtomIndex internChar16(FrontendContext* fc,
    726                                     const char16_t* char16Ptr,
    727                                     uint32_t length);
    728 
    729  TaggedParserAtomIndex internJSAtom(FrontendContext* fc,
    730                                     CompilationAtomCache& atomCache,
    731                                     JSAtom* atom);
    732 
    733  // Intern ParserAtom data from other ParserAtomTable.
    734  // This copies flags as well.
    735  TaggedParserAtomIndex internExternalParserAtom(FrontendContext* fc,
    736                                                 const ParserAtom* atom);
    737 
    738  // The atomIndex given as argument is in relation with the context Stencil.
    739  // The atomIndex might be a well-known or static, in which case this function
    740  // is a no-op.
    741  TaggedParserAtomIndex internExternalParserAtomIndex(
    742      FrontendContext* fc, const CompilationStencil& context,
    743      TaggedParserAtomIndex atomIndex);
    744 
    745  // Compare an internal atom index with an external atom index coming from the
    746  // stencil given as argument.
    747  bool isEqualToExternalParserAtomIndex(TaggedParserAtomIndex internal,
    748                                        const CompilationStencil& context,
    749                                        TaggedParserAtomIndex external) const;
    750 
    751  bool addPlaceholder(FrontendContext* fc);
    752 
    753 private:
    754  const ParserAtom* getWellKnown(WellKnownAtomId atomId) const;
    755  ParserAtom* getParserAtom(ParserAtomIndex index) const;
    756 
    757 public:
    758  const ParserAtomVector& entries() const { return entries_; }
    759 
    760  // Accessors for querying atom properties.
    761  bool isIdentifier(TaggedParserAtomIndex index) const;
    762  bool isPrivateName(TaggedParserAtomIndex index) const;
    763  bool isExtendedUnclonedSelfHostedFunctionName(
    764      TaggedParserAtomIndex index) const;
    765  bool isModuleExportName(TaggedParserAtomIndex index) const;
    766  bool isIndex(TaggedParserAtomIndex index, uint32_t* indexp) const;
    767  bool isInstantiatedAsJSAtom(TaggedParserAtomIndex index) const;
    768  uint32_t length(TaggedParserAtomIndex index) const;
    769  HashNumber hash(TaggedParserAtomIndex index) const;
    770 
    771  // Methods for atom.
    772  void markUsedByStencil(TaggedParserAtomIndex index,
    773                         ParserAtom::Atomize atomize) const;
    774  void markAtomize(TaggedParserAtomIndex index,
    775                   ParserAtom::Atomize atomize) const;
    776  double toNumber(TaggedParserAtomIndex index) const;
    777  UniqueChars toNewUTF8CharsZ(FrontendContext* fc,
    778                              TaggedParserAtomIndex index) const;
    779  UniqueChars toPrintableString(TaggedParserAtomIndex index) const;
    780  UniqueChars toQuotedString(TaggedParserAtomIndex index) const;
    781  JSAtom* toJSAtom(JSContext* cx, FrontendContext* fc,
    782                   TaggedParserAtomIndex index,
    783                   CompilationAtomCache& atomCache) const;
    784 
    785 private:
    786  JSAtom* toWellKnownJSAtom(JSContext* cx, TaggedParserAtomIndex index) const;
    787 
    788 public:
    789  bool appendTo(StringBuilder& sb, TaggedParserAtomIndex index) const;
    790 
    791 public:
    792 #if defined(DEBUG) || defined(JS_JITSPEW)
    793  void dump(TaggedParserAtomIndex index) const;
    794  void dumpCharsNoQuote(js::GenericPrinter& out,
    795                        TaggedParserAtomIndex index) const;
    796 
    797  static void dumpCharsNoQuote(js::GenericPrinter& out, WellKnownAtomId id);
    798  static void dumpCharsNoQuote(js::GenericPrinter& out,
    799                               Length1StaticParserString index);
    800  static void dumpCharsNoQuote(js::GenericPrinter& out,
    801                               Length2StaticParserString index);
    802  static void dumpCharsNoQuote(js::GenericPrinter& out,
    803                               Length3StaticParserString index);
    804 #endif
    805 
    806  static void getLength1Content(Length1StaticParserString s,
    807                                Latin1Char contents[1]) {
    808    contents[0] = Latin1Char(s);
    809  }
    810 
    811  static void getLength2Content(Length2StaticParserString s, char contents[2]) {
    812    contents[0] = StaticStrings::firstCharOfLength2(size_t(s));
    813    contents[1] = StaticStrings::secondCharOfLength2(size_t(s));
    814  }
    815 
    816  static void getLength3Content(Length3StaticParserString s, char contents[3]) {
    817    contents[0] = StaticStrings::firstCharOfLength3(int32_t(s));
    818    contents[1] = StaticStrings::secondCharOfLength3(int32_t(s));
    819    contents[2] = StaticStrings::thirdCharOfLength3(int32_t(s));
    820  }
    821 };
    822 
    823 // Lightweight version of ParserAtomsTable.
    824 // This doesn't support deduplication.
    825 // Used while decoding XDR.
    826 class ParserAtomSpanBuilder {
    827  ParserAtomSpan& entries_;
    828 
    829 public:
    830  explicit ParserAtomSpanBuilder(ParserAtomSpan& entries) : entries_(entries) {}
    831 
    832  bool allocate(FrontendContext* fc, LifoAlloc& alloc, size_t count);
    833 
    834  void set(ParserAtomIndex index, const ParserAtom* atom) {
    835    entries_[index] = const_cast<ParserAtom*>(atom);
    836  }
    837 };
    838 
    839 template <typename CharT>
    840 class SpecificParserAtomLookup : public ParserAtomLookup {
    841  // The sequence of characters to look up.
    842  InflatedChar16Sequence<CharT> seq_;
    843 
    844 public:
    845  explicit SpecificParserAtomLookup(const InflatedChar16Sequence<CharT>& seq)
    846      : SpecificParserAtomLookup(seq, seq.computeHash()) {}
    847 
    848  SpecificParserAtomLookup(const InflatedChar16Sequence<CharT>& seq,
    849                           HashNumber hash)
    850      : ParserAtomLookup(hash), seq_(seq) {
    851    MOZ_ASSERT(seq_.computeHash() == hash);
    852  }
    853 
    854  virtual bool equalsEntry(const ParserAtom* entry) const override {
    855    return entry->equalsSeq<CharT>(hash_, seq_);
    856  }
    857 
    858  virtual bool equalsEntry(const WellKnownAtomInfo* info) const override {
    859    // Compare hashes first.
    860    if (info->hash != hash_) {
    861      return false;
    862    }
    863 
    864    InflatedChar16Sequence<CharT> seq = seq_;
    865    for (uint32_t i = 0; i < info->length; i++) {
    866      if (!seq.hasMore() || char16_t(info->content[i]) != seq.next()) {
    867        return false;
    868      }
    869    }
    870    return !seq.hasMore();
    871  }
    872 };
    873 
    874 template <typename CharT>
    875 inline bool ParserAtom::equalsSeq(HashNumber hash,
    876                                  InflatedChar16Sequence<CharT> seq) const {
    877  // Compare hashes first.
    878  if (hash_ != hash) {
    879    return false;
    880  }
    881 
    882  if (hasTwoByteChars()) {
    883    const char16_t* chars = twoByteChars();
    884    for (uint32_t i = 0; i < length_; i++) {
    885      if (!seq.hasMore() || chars[i] != seq.next()) {
    886        return false;
    887      }
    888    }
    889  } else {
    890    const Latin1Char* chars = latin1Chars();
    891    for (uint32_t i = 0; i < length_; i++) {
    892      if (!seq.hasMore() || char16_t(chars[i]) != seq.next()) {
    893        return false;
    894      }
    895    }
    896  }
    897  return !seq.hasMore();
    898 }
    899 
    900 JSAtom* GetWellKnownAtom(JSContext* cx, WellKnownAtomId atomId);
    901 
    902 } /* namespace frontend */
    903 } /* namespace js */
    904 
    905 #endif  // frontend_ParserAtom_h