tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

StaticStrings.h (8733B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef vm_StaticStrings_h
      8 #define vm_StaticStrings_h
      9 
     10 #include "mozilla/Assertions.h"  // MOZ_ASSERT
     11 #include "mozilla/Attributes.h"  // MOZ_ALWAYS_INLINE
     12 #include "mozilla/TextUtils.h"  // mozilla::{IsAsciiDigit, IsAsciiLowercaseAlpha, IsAsciiUppercaseAlpha}
     13 
     14 #include <stddef.h>     // size_t
     15 #include <stdint.h>     // int32_t, uint32_t
     16 #include <type_traits>  // std::is_same_v
     17 
     18 #include "jstypes.h"  // JS_PUBLIC_API, js::Bit, js::BitMask
     19 
     20 #include "js/TypeDecls.h"  // JS::Latin1Char
     21 
     22 struct JS_PUBLIC_API JSContext;
     23 
     24 class JSAtom;
     25 class JSLinearString;
     26 class JSString;
     27 
     28 namespace js {
     29 
     30 namespace frontend {
     31 class ParserAtomsTable;
     32 class TaggedParserAtomIndex;
     33 class WellKnownParserAtoms;
     34 struct CompilationAtomCache;
     35 }  // namespace frontend
     36 
     37 namespace jit {
     38 class MacroAssembler;
     39 }  // namespace jit
     40 
     41 class StaticStrings {
     42  // NOTE: The WellKnownParserAtoms rely on these tables and may need to be
     43  //       update if these tables are changed.
     44  friend class js::frontend::ParserAtomsTable;
     45  friend class js::frontend::TaggedParserAtomIndex;
     46  friend class js::frontend::WellKnownParserAtoms;
     47  friend struct js::frontend::CompilationAtomCache;
     48 
     49  friend class js::jit::MacroAssembler;
     50 
     51 private:
     52  // Strings matches `[A-Za-z0-9$_]{2}` pattern.
     53  // Store each character in 6 bits.
     54  // See fromSmallChar/toSmallChar for the mapping.
     55  static constexpr size_t SMALL_CHAR_BITS = 6;
     56  static constexpr size_t SMALL_CHAR_MASK = js::BitMask(SMALL_CHAR_BITS);
     57 
     58  // To optimize ASCII -> small char, allocate a table.
     59  static constexpr size_t SMALL_CHAR_TABLE_SIZE = 128U;
     60  static constexpr size_t NUM_SMALL_CHARS = js::Bit(SMALL_CHAR_BITS);
     61  static constexpr size_t NUM_LENGTH2_ENTRIES =
     62      NUM_SMALL_CHARS * NUM_SMALL_CHARS;
     63 
     64 public:
     65  /* We keep these public for the JITs. */
     66  static const size_t UNIT_STATIC_LIMIT = 256U;
     67  static const size_t INT_STATIC_LIMIT = 256U;
     68 
     69 private:
     70  JSAtom* length2StaticTable[NUM_LENGTH2_ENTRIES] = {};  // zeroes
     71  JSAtom* unitStaticTable[UNIT_STATIC_LIMIT] = {};       // zeroes
     72  JSAtom* intStaticTable[INT_STATIC_LIMIT] = {};         // zeroes
     73 
     74 public:
     75  StaticStrings() = default;
     76 
     77  bool init(JSContext* cx);
     78 
     79  static bool hasUint(uint32_t u) { return u < INT_STATIC_LIMIT; }
     80 
     81  JSAtom* getUint(uint32_t u) {
     82    MOZ_ASSERT(hasUint(u));
     83    return intStaticTable[u];
     84  }
     85 
     86  static bool hasInt(int32_t i) { return uint32_t(i) < INT_STATIC_LIMIT; }
     87 
     88  JSAtom* getInt(int32_t i) {
     89    MOZ_ASSERT(hasInt(i));
     90    return getUint(uint32_t(i));
     91  }
     92 
     93  static bool hasUnit(char16_t c) { return c < UNIT_STATIC_LIMIT; }
     94 
     95  JSAtom* getUnit(char16_t c) {
     96    MOZ_ASSERT(hasUnit(c));
     97    return unitStaticTable[c];
     98  }
     99 
    100  /* May not return atom, returns null on (reported) failure. */
    101  inline JSLinearString* getUnitString(JSContext* cx, char16_t c);
    102 
    103  /* May not return atom, returns null on (reported) failure. */
    104  inline JSLinearString* getUnitStringForElement(JSContext* cx, JSString* str,
    105                                                 size_t index);
    106 
    107  /* May not return atom, returns null on (reported) failure. */
    108  inline JSLinearString* getUnitStringForElement(JSContext* cx,
    109                                                 const JSLinearString* str,
    110                                                 size_t index);
    111 
    112  template <typename CharT>
    113  static bool isStatic(const CharT* chars, size_t len);
    114 
    115  /* Return null if no static atom exists for the given (chars, length). */
    116  template <typename CharT>
    117  MOZ_ALWAYS_INLINE JSAtom* lookup(const CharT* chars, size_t length) {
    118    static_assert(std::is_same_v<CharT, JS::Latin1Char> ||
    119                      std::is_same_v<CharT, char16_t>,
    120                  "for understandability, |chars| must be one of a few "
    121                  "identified types");
    122 
    123    switch (length) {
    124      case 1: {
    125        char16_t c = chars[0];
    126        if (c < UNIT_STATIC_LIMIT) {
    127          return getUnit(c);
    128        }
    129        return nullptr;
    130      }
    131      case 2:
    132        if (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1])) {
    133          return getLength2(chars[0], chars[1]);
    134        }
    135        return nullptr;
    136      case 3:
    137        /*
    138         * Here we know that JSString::intStringTable covers only 256 (or at
    139         * least not 1000 or more) chars. We rely on order here to resolve the
    140         * unit vs. int string/length-2 string atom identity issue by giving
    141         * priority to unit strings for "0" through "9" and length-2 strings for
    142         * "10" through "99".
    143         */
    144        int i;
    145        if (fitsInLength3Static(chars[0], chars[1], chars[2], &i)) {
    146          return getInt(i);
    147        }
    148        return nullptr;
    149    }
    150 
    151    return nullptr;
    152  }
    153 
    154  MOZ_ALWAYS_INLINE JSAtom* lookup(const char* chars, size_t length) {
    155    // Collapse calls for |const char*| into |const Latin1Char char*| to avoid
    156    // excess instantiations.
    157    return lookup(reinterpret_cast<const JS::Latin1Char*>(chars), length);
    158  }
    159 
    160 private:
    161  using SmallChar = uint8_t;
    162 
    163  struct SmallCharTable {
    164    SmallChar storage[SMALL_CHAR_TABLE_SIZE];
    165 
    166    constexpr SmallChar& operator[](size_t idx) { return storage[idx]; }
    167    constexpr const SmallChar& operator[](size_t idx) const {
    168      return storage[idx];
    169    }
    170  };
    171 
    172  static const SmallChar INVALID_SMALL_CHAR = -1;
    173 
    174  static bool fitsInSmallChar(char16_t c) {
    175    return c < SMALL_CHAR_TABLE_SIZE &&
    176           toSmallCharTable[c] != INVALID_SMALL_CHAR;
    177  }
    178 
    179  template <typename CharT>
    180  static bool fitsInLength3Static(CharT c1, CharT c2, CharT c3, int* i) {
    181    static_assert(INT_STATIC_LIMIT <= 299,
    182                  "static int strings assumed below to be at most "
    183                  "three digits where the first digit is either 1 or 2");
    184    if ('1' <= c1 && c1 < '3' && '0' <= c2 && c2 <= '9' && '0' <= c3 &&
    185        c3 <= '9') {
    186      *i = (c1 - '0') * 100 + (c2 - '0') * 10 + (c3 - '0');
    187 
    188      if (unsigned(*i) < INT_STATIC_LIMIT) {
    189        return true;
    190      }
    191    }
    192    return false;
    193  }
    194 
    195  static constexpr JS::Latin1Char fromSmallChar(SmallChar c);
    196 
    197  static constexpr SmallChar toSmallChar(uint32_t c);
    198 
    199  static constexpr SmallCharTable createSmallCharTable();
    200 
    201  static const SmallCharTable toSmallCharTable;
    202 
    203  static constexpr JS::Latin1Char firstCharOfLength2(size_t s) {
    204    return fromSmallChar(s >> SMALL_CHAR_BITS);
    205  }
    206  static constexpr JS::Latin1Char secondCharOfLength2(size_t s) {
    207    return fromSmallChar(s & SMALL_CHAR_MASK);
    208  }
    209 
    210  static constexpr JS::Latin1Char firstCharOfLength3(uint32_t i) {
    211    return '0' + (i / 100);
    212  }
    213  static constexpr JS::Latin1Char secondCharOfLength3(uint32_t i) {
    214    return '0' + ((i / 10) % 10);
    215  }
    216  static constexpr JS::Latin1Char thirdCharOfLength3(uint32_t i) {
    217    return '0' + (i % 10);
    218  }
    219 
    220  static MOZ_ALWAYS_INLINE size_t getLength2Index(char16_t c1, char16_t c2) {
    221    MOZ_ASSERT(fitsInSmallChar(c1));
    222    MOZ_ASSERT(fitsInSmallChar(c2));
    223    return (size_t(toSmallCharTable[c1]) << SMALL_CHAR_BITS) +
    224           toSmallCharTable[c2];
    225  }
    226 
    227  // Same as getLength2Index, but withtout runtime assertion,
    228  // this should be used only for known static string.
    229  static constexpr size_t getLength2IndexStatic(char c1, char c2) {
    230    return (size_t(toSmallChar(c1)) << SMALL_CHAR_BITS) + toSmallChar(c2);
    231  }
    232 
    233  MOZ_ALWAYS_INLINE JSAtom* getLength2FromIndex(size_t index) {
    234    return length2StaticTable[index];
    235  }
    236 
    237  MOZ_ALWAYS_INLINE JSAtom* getLength2(char16_t c1, char16_t c2) {
    238    return getLength2FromIndex(getLength2Index(c1, c2));
    239  }
    240 };
    241 
    242 /*
    243 * Declare length-2 strings. We only store strings where both characters are
    244 * alphanumeric. The lower 10 short chars are the numerals, the next 26 are
    245 * the lowercase letters, and the next 26 are the uppercase letters.
    246 */
    247 
    248 constexpr JS::Latin1Char StaticStrings::fromSmallChar(SmallChar c) {
    249  if (c < 10) {
    250    return c + '0';
    251  }
    252  if (c < 36) {
    253    return c + 'a' - 10;
    254  }
    255  if (c < 62) {
    256    return c + 'A' - 36;
    257  }
    258  if (c == 62) {
    259    return '$';
    260  }
    261  return '_';
    262 }
    263 
    264 constexpr StaticStrings::SmallChar StaticStrings::toSmallChar(uint32_t c) {
    265  if (mozilla::IsAsciiDigit(c)) {
    266    return c - '0';
    267  }
    268  if (mozilla::IsAsciiLowercaseAlpha(c)) {
    269    return c - 'a' + 10;
    270  }
    271  if (mozilla::IsAsciiUppercaseAlpha(c)) {
    272    return c - 'A' + 36;
    273  }
    274  if (c == '$') {
    275    return 62;
    276  }
    277  if (c == '_') {
    278    return 63;
    279  }
    280  return StaticStrings::INVALID_SMALL_CHAR;
    281 }
    282 
    283 }  // namespace js
    284 
    285 #endif /* vm_StaticStrings_h */