StaticStrings.h (8733B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef vm_StaticStrings_h 8 #define vm_StaticStrings_h 9 10 #include "mozilla/Assertions.h" // MOZ_ASSERT 11 #include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE 12 #include "mozilla/TextUtils.h" // mozilla::{IsAsciiDigit, IsAsciiLowercaseAlpha, IsAsciiUppercaseAlpha} 13 14 #include <stddef.h> // size_t 15 #include <stdint.h> // int32_t, uint32_t 16 #include <type_traits> // std::is_same_v 17 18 #include "jstypes.h" // JS_PUBLIC_API, js::Bit, js::BitMask 19 20 #include "js/TypeDecls.h" // JS::Latin1Char 21 22 struct JS_PUBLIC_API JSContext; 23 24 class JSAtom; 25 class JSLinearString; 26 class JSString; 27 28 namespace js { 29 30 namespace frontend { 31 class ParserAtomsTable; 32 class TaggedParserAtomIndex; 33 class WellKnownParserAtoms; 34 struct CompilationAtomCache; 35 } // namespace frontend 36 37 namespace jit { 38 class MacroAssembler; 39 } // namespace jit 40 41 class StaticStrings { 42 // NOTE: The WellKnownParserAtoms rely on these tables and may need to be 43 // update if these tables are changed. 44 friend class js::frontend::ParserAtomsTable; 45 friend class js::frontend::TaggedParserAtomIndex; 46 friend class js::frontend::WellKnownParserAtoms; 47 friend struct js::frontend::CompilationAtomCache; 48 49 friend class js::jit::MacroAssembler; 50 51 private: 52 // Strings matches `[A-Za-z0-9$_]{2}` pattern. 53 // Store each character in 6 bits. 54 // See fromSmallChar/toSmallChar for the mapping. 55 static constexpr size_t SMALL_CHAR_BITS = 6; 56 static constexpr size_t SMALL_CHAR_MASK = js::BitMask(SMALL_CHAR_BITS); 57 58 // To optimize ASCII -> small char, allocate a table. 59 static constexpr size_t SMALL_CHAR_TABLE_SIZE = 128U; 60 static constexpr size_t NUM_SMALL_CHARS = js::Bit(SMALL_CHAR_BITS); 61 static constexpr size_t NUM_LENGTH2_ENTRIES = 62 NUM_SMALL_CHARS * NUM_SMALL_CHARS; 63 64 public: 65 /* We keep these public for the JITs. */ 66 static const size_t UNIT_STATIC_LIMIT = 256U; 67 static const size_t INT_STATIC_LIMIT = 256U; 68 69 private: 70 JSAtom* length2StaticTable[NUM_LENGTH2_ENTRIES] = {}; // zeroes 71 JSAtom* unitStaticTable[UNIT_STATIC_LIMIT] = {}; // zeroes 72 JSAtom* intStaticTable[INT_STATIC_LIMIT] = {}; // zeroes 73 74 public: 75 StaticStrings() = default; 76 77 bool init(JSContext* cx); 78 79 static bool hasUint(uint32_t u) { return u < INT_STATIC_LIMIT; } 80 81 JSAtom* getUint(uint32_t u) { 82 MOZ_ASSERT(hasUint(u)); 83 return intStaticTable[u]; 84 } 85 86 static bool hasInt(int32_t i) { return uint32_t(i) < INT_STATIC_LIMIT; } 87 88 JSAtom* getInt(int32_t i) { 89 MOZ_ASSERT(hasInt(i)); 90 return getUint(uint32_t(i)); 91 } 92 93 static bool hasUnit(char16_t c) { return c < UNIT_STATIC_LIMIT; } 94 95 JSAtom* getUnit(char16_t c) { 96 MOZ_ASSERT(hasUnit(c)); 97 return unitStaticTable[c]; 98 } 99 100 /* May not return atom, returns null on (reported) failure. */ 101 inline JSLinearString* getUnitString(JSContext* cx, char16_t c); 102 103 /* May not return atom, returns null on (reported) failure. */ 104 inline JSLinearString* getUnitStringForElement(JSContext* cx, JSString* str, 105 size_t index); 106 107 /* May not return atom, returns null on (reported) failure. */ 108 inline JSLinearString* getUnitStringForElement(JSContext* cx, 109 const JSLinearString* str, 110 size_t index); 111 112 template <typename CharT> 113 static bool isStatic(const CharT* chars, size_t len); 114 115 /* Return null if no static atom exists for the given (chars, length). */ 116 template <typename CharT> 117 MOZ_ALWAYS_INLINE JSAtom* lookup(const CharT* chars, size_t length) { 118 static_assert(std::is_same_v<CharT, JS::Latin1Char> || 119 std::is_same_v<CharT, char16_t>, 120 "for understandability, |chars| must be one of a few " 121 "identified types"); 122 123 switch (length) { 124 case 1: { 125 char16_t c = chars[0]; 126 if (c < UNIT_STATIC_LIMIT) { 127 return getUnit(c); 128 } 129 return nullptr; 130 } 131 case 2: 132 if (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1])) { 133 return getLength2(chars[0], chars[1]); 134 } 135 return nullptr; 136 case 3: 137 /* 138 * Here we know that JSString::intStringTable covers only 256 (or at 139 * least not 1000 or more) chars. We rely on order here to resolve the 140 * unit vs. int string/length-2 string atom identity issue by giving 141 * priority to unit strings for "0" through "9" and length-2 strings for 142 * "10" through "99". 143 */ 144 int i; 145 if (fitsInLength3Static(chars[0], chars[1], chars[2], &i)) { 146 return getInt(i); 147 } 148 return nullptr; 149 } 150 151 return nullptr; 152 } 153 154 MOZ_ALWAYS_INLINE JSAtom* lookup(const char* chars, size_t length) { 155 // Collapse calls for |const char*| into |const Latin1Char char*| to avoid 156 // excess instantiations. 157 return lookup(reinterpret_cast<const JS::Latin1Char*>(chars), length); 158 } 159 160 private: 161 using SmallChar = uint8_t; 162 163 struct SmallCharTable { 164 SmallChar storage[SMALL_CHAR_TABLE_SIZE]; 165 166 constexpr SmallChar& operator[](size_t idx) { return storage[idx]; } 167 constexpr const SmallChar& operator[](size_t idx) const { 168 return storage[idx]; 169 } 170 }; 171 172 static const SmallChar INVALID_SMALL_CHAR = -1; 173 174 static bool fitsInSmallChar(char16_t c) { 175 return c < SMALL_CHAR_TABLE_SIZE && 176 toSmallCharTable[c] != INVALID_SMALL_CHAR; 177 } 178 179 template <typename CharT> 180 static bool fitsInLength3Static(CharT c1, CharT c2, CharT c3, int* i) { 181 static_assert(INT_STATIC_LIMIT <= 299, 182 "static int strings assumed below to be at most " 183 "three digits where the first digit is either 1 or 2"); 184 if ('1' <= c1 && c1 < '3' && '0' <= c2 && c2 <= '9' && '0' <= c3 && 185 c3 <= '9') { 186 *i = (c1 - '0') * 100 + (c2 - '0') * 10 + (c3 - '0'); 187 188 if (unsigned(*i) < INT_STATIC_LIMIT) { 189 return true; 190 } 191 } 192 return false; 193 } 194 195 static constexpr JS::Latin1Char fromSmallChar(SmallChar c); 196 197 static constexpr SmallChar toSmallChar(uint32_t c); 198 199 static constexpr SmallCharTable createSmallCharTable(); 200 201 static const SmallCharTable toSmallCharTable; 202 203 static constexpr JS::Latin1Char firstCharOfLength2(size_t s) { 204 return fromSmallChar(s >> SMALL_CHAR_BITS); 205 } 206 static constexpr JS::Latin1Char secondCharOfLength2(size_t s) { 207 return fromSmallChar(s & SMALL_CHAR_MASK); 208 } 209 210 static constexpr JS::Latin1Char firstCharOfLength3(uint32_t i) { 211 return '0' + (i / 100); 212 } 213 static constexpr JS::Latin1Char secondCharOfLength3(uint32_t i) { 214 return '0' + ((i / 10) % 10); 215 } 216 static constexpr JS::Latin1Char thirdCharOfLength3(uint32_t i) { 217 return '0' + (i % 10); 218 } 219 220 static MOZ_ALWAYS_INLINE size_t getLength2Index(char16_t c1, char16_t c2) { 221 MOZ_ASSERT(fitsInSmallChar(c1)); 222 MOZ_ASSERT(fitsInSmallChar(c2)); 223 return (size_t(toSmallCharTable[c1]) << SMALL_CHAR_BITS) + 224 toSmallCharTable[c2]; 225 } 226 227 // Same as getLength2Index, but withtout runtime assertion, 228 // this should be used only for known static string. 229 static constexpr size_t getLength2IndexStatic(char c1, char c2) { 230 return (size_t(toSmallChar(c1)) << SMALL_CHAR_BITS) + toSmallChar(c2); 231 } 232 233 MOZ_ALWAYS_INLINE JSAtom* getLength2FromIndex(size_t index) { 234 return length2StaticTable[index]; 235 } 236 237 MOZ_ALWAYS_INLINE JSAtom* getLength2(char16_t c1, char16_t c2) { 238 return getLength2FromIndex(getLength2Index(c1, c2)); 239 } 240 }; 241 242 /* 243 * Declare length-2 strings. We only store strings where both characters are 244 * alphanumeric. The lower 10 short chars are the numerals, the next 26 are 245 * the lowercase letters, and the next 26 are the uppercase letters. 246 */ 247 248 constexpr JS::Latin1Char StaticStrings::fromSmallChar(SmallChar c) { 249 if (c < 10) { 250 return c + '0'; 251 } 252 if (c < 36) { 253 return c + 'a' - 10; 254 } 255 if (c < 62) { 256 return c + 'A' - 36; 257 } 258 if (c == 62) { 259 return '$'; 260 } 261 return '_'; 262 } 263 264 constexpr StaticStrings::SmallChar StaticStrings::toSmallChar(uint32_t c) { 265 if (mozilla::IsAsciiDigit(c)) { 266 return c - '0'; 267 } 268 if (mozilla::IsAsciiLowercaseAlpha(c)) { 269 return c - 'a' + 10; 270 } 271 if (mozilla::IsAsciiUppercaseAlpha(c)) { 272 return c - 'A' + 36; 273 } 274 if (c == '$') { 275 return 62; 276 } 277 if (c == '_') { 278 return 63; 279 } 280 return StaticStrings::INVALID_SMALL_CHAR; 281 } 282 283 } // namespace js 284 285 #endif /* vm_StaticStrings_h */