SharedImmutableStringsCache.h (13750B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef vm_SharedImmutableStringsCache_h 8 #define vm_SharedImmutableStringsCache_h 9 10 #include "mozilla/Maybe.h" 11 #include "mozilla/UniquePtr.h" 12 13 #include <cstring> 14 #include <new> // for placement new 15 #include <utility> // std::move 16 17 #include "js/AllocPolicy.h" 18 #include "js/HashTable.h" 19 #include "js/UniquePtr.h" 20 #include "js/Utility.h" 21 22 #include "threading/ExclusiveData.h" 23 24 namespace js { 25 26 class SharedImmutableString; 27 class SharedImmutableTwoByteString; 28 29 /** 30 * The `SharedImmutableStringsCache` allows safely sharing and deduplicating 31 * immutable strings (either `const char*` [any encoding, not restricted to 32 * only Latin-1 or only UTF-8] or `const char16_t*`) between threads. 33 * 34 * The locking mechanism is dead-simple and coarse grained: a single lock guards 35 * all of the internal table itself, the table's entries, and the entries' 36 * reference counts. It is only safe to perform any mutation on the cache or any 37 * data stored within the cache when this lock is acquired. 38 */ 39 class SharedImmutableStringsCache { 40 static SharedImmutableStringsCache singleton_; 41 42 friend class SharedImmutableString; 43 friend class SharedImmutableTwoByteString; 44 struct Hasher; 45 46 public: 47 using OwnedChars = JS::UniqueChars; 48 using OwnedTwoByteChars = JS::UniqueTwoByteChars; 49 50 /** 51 * Get the canonical, shared, and de-duplicated version of the given `const 52 * char*` string. If such a string does not exist, call `intoOwnedChars` and 53 * add the string it returns to the cache. 54 * 55 * `intoOwnedChars` must create an owned version of the given string, and 56 * must have one of the following types: 57 * 58 * JS::UniqueChars intoOwnedChars(); 59 * JS::UniqueChars&& intoOwnedChars(); 60 * 61 * It can be used by callers to elide a copy of the string when it is safe 62 * to give up ownership of the lookup string to the cache. It must return a 63 * `nullptr` on failure. 64 * 65 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 66 * returned. 67 */ 68 template <typename IntoOwnedChars> 69 [[nodiscard]] SharedImmutableString getOrCreate( 70 const char* chars, size_t length, IntoOwnedChars intoOwnedChars); 71 72 /** 73 * Take ownership of the given `chars` and return the canonical, shared and 74 * de-duplicated version. 75 * 76 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 77 * returned. 78 */ 79 [[nodiscard]] SharedImmutableString getOrCreate(OwnedChars&& chars, 80 size_t length); 81 82 /** 83 * Do not take ownership of the given `chars`. Return the canonical, shared 84 * and de-duplicated version. If there is no extant shared version of 85 * `chars`, make a copy and insert it into the cache. 86 * 87 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 88 * returned. 89 */ 90 [[nodiscard]] SharedImmutableString getOrCreate(const char* chars, 91 size_t length); 92 93 /** 94 * Get the canonical, shared, and de-duplicated version of the given `const 95 * char16_t*` string. If such a string does not exist, call `intoOwnedChars` 96 * and add the string it returns to the cache. 97 * 98 * `intoOwnedTwoByteChars` must create an owned version of the given string, 99 * and must have one of the following types: 100 * 101 * JS::UniqueTwoByteChars intoOwnedTwoByteChars(); 102 * JS::UniqueTwoByteChars&& intoOwnedTwoByteChars(); 103 * 104 * It can be used by callers to elide a copy of the string when it is safe 105 * to give up ownership of the lookup string to the cache. It must return a 106 * `nullptr` on failure. 107 * 108 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 109 * returned. 110 */ 111 template <typename IntoOwnedTwoByteChars> 112 [[nodiscard]] SharedImmutableTwoByteString getOrCreate( 113 const char16_t* chars, size_t length, 114 IntoOwnedTwoByteChars intoOwnedTwoByteChars); 115 116 /** 117 * Take ownership of the given `chars` and return the canonical, shared and 118 * de-duplicated version. 119 * 120 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 121 * returned. 122 */ 123 [[nodiscard]] SharedImmutableTwoByteString getOrCreate( 124 OwnedTwoByteChars&& chars, size_t length); 125 126 /** 127 * Do not take ownership of the given `chars`. Return the canonical, shared 128 * and de-duplicated version. If there is no extant shared version of 129 * `chars`, then make a copy and insert it into the cache. 130 * 131 * On success, `Some` is returned. In the case of OOM failure, `Nothing` is 132 * returned. 133 */ 134 [[nodiscard]] SharedImmutableTwoByteString getOrCreate(const char16_t* chars, 135 size_t length); 136 137 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const { 138 MOZ_ASSERT(inner_); 139 size_t n = mallocSizeOf(inner_); 140 141 auto locked = inner_->lock(); 142 143 // Size of the table. 144 n += locked->set.shallowSizeOfExcludingThis(mallocSizeOf); 145 146 // Sizes of the strings and their boxes. 147 for (auto r = locked->set.all(); !r.empty(); r.popFront()) { 148 n += mallocSizeOf(r.front().get()); 149 if (const char* chars = r.front()->chars()) { 150 n += mallocSizeOf(chars); 151 } 152 } 153 154 return n; 155 } 156 157 private: 158 bool init(); 159 void free(); 160 161 public: 162 static bool initSingleton(); 163 static void freeSingleton(); 164 165 static SharedImmutableStringsCache& getSingleton() { 166 MOZ_ASSERT(singleton_.inner_); 167 return singleton_; 168 } 169 170 private: 171 SharedImmutableStringsCache() = default; 172 ~SharedImmutableStringsCache() = default; 173 174 public: 175 SharedImmutableStringsCache(const SharedImmutableStringsCache& rhs) = delete; 176 SharedImmutableStringsCache(SharedImmutableStringsCache&& rhs) = delete; 177 178 SharedImmutableStringsCache& operator=(SharedImmutableStringsCache&& rhs) = 179 delete; 180 181 SharedImmutableStringsCache& operator=(const SharedImmutableStringsCache&) = 182 delete; 183 184 /** 185 * Purge the cache of all refcount == 0 entries. 186 */ 187 void purge() { 188 auto locked = inner_->lock(); 189 190 for (Inner::Set::Enum e(locked->set); !e.empty(); e.popFront()) { 191 if (e.front()->refcount == 0) { 192 // The chars should be eagerly freed when refcount reaches zero. 193 MOZ_ASSERT(!e.front()->chars()); 194 e.removeFront(); 195 } else { 196 // The chars should exist as long as the refcount is non-zero. 197 MOZ_ASSERT(e.front()->chars()); 198 } 199 } 200 } 201 202 private: 203 struct Inner; 204 class StringBox { 205 friend class SharedImmutableString; 206 207 OwnedChars chars_; 208 size_t length_; 209 const ExclusiveData<Inner>* cache_; 210 211 public: 212 mutable size_t refcount; 213 214 using Ptr = js::UniquePtr<StringBox>; 215 216 StringBox(OwnedChars&& chars, size_t length, 217 const ExclusiveData<Inner>* cache) 218 : chars_(std::move(chars)), 219 length_(length), 220 cache_(cache), 221 refcount(0) { 222 MOZ_ASSERT(chars_); 223 } 224 225 static Ptr Create(OwnedChars&& chars, size_t length, 226 const ExclusiveData<Inner>* cache) { 227 return js::MakeUnique<StringBox>(std::move(chars), length, cache); 228 } 229 230 StringBox(const StringBox&) = delete; 231 StringBox& operator=(const StringBox&) = delete; 232 233 ~StringBox() { 234 MOZ_RELEASE_ASSERT( 235 refcount == 0, 236 "There are `SharedImmutable[TwoByte]String`s outliving their " 237 "associated cache! This always leads to use-after-free in the " 238 "`~SharedImmutableString` destructor!"); 239 } 240 241 const char* chars() const { return chars_.get(); } 242 size_t length() const { return length_; } 243 }; 244 245 struct Hasher { 246 /** 247 * A structure used when querying for a `const char*` string in the cache. 248 */ 249 class Lookup { 250 friend struct Hasher; 251 252 HashNumber hash_; 253 const char* chars_; 254 size_t length_; 255 256 public: 257 Lookup(HashNumber hash, const char* chars, size_t length) 258 : hash_(hash), chars_(chars), length_(length) { 259 MOZ_ASSERT(chars_); 260 MOZ_ASSERT(hash == Hasher::hashLongString(chars, length)); 261 } 262 263 Lookup(HashNumber hash, const char16_t* chars, size_t length) 264 : Lookup(hash, reinterpret_cast<const char*>(chars), 265 length * sizeof(char16_t)) {} 266 }; 267 268 static const size_t SHORT_STRING_MAX_LENGTH = 8192; 269 static const size_t HASH_CHUNK_LENGTH = SHORT_STRING_MAX_LENGTH / 2; 270 271 // For strings longer than SHORT_STRING_MAX_LENGTH, we only hash the 272 // first HASH_CHUNK_LENGTH and last HASH_CHUNK_LENGTH characters in the 273 // string. This increases the risk of collisions, but in practice it 274 // should be rare, and it yields a large speedup for hashing long 275 // strings. 276 static HashNumber hashLongString(const char* chars, size_t length) { 277 MOZ_ASSERT(chars); 278 return length <= SHORT_STRING_MAX_LENGTH 279 ? mozilla::HashString(chars, length) 280 : mozilla::AddToHash( 281 mozilla::HashString(chars, HASH_CHUNK_LENGTH), 282 mozilla::HashString(chars + length - HASH_CHUNK_LENGTH, 283 HASH_CHUNK_LENGTH)); 284 } 285 286 static HashNumber hash(const Lookup& lookup) { return lookup.hash_; } 287 288 static bool match(const StringBox::Ptr& key, const Lookup& lookup) { 289 MOZ_ASSERT(lookup.chars_); 290 291 if (!key->chars() || key->length() != lookup.length_) { 292 return false; 293 } 294 295 if (key->chars() == lookup.chars_) { 296 return true; 297 } 298 299 return memcmp(key->chars(), lookup.chars_, key->length()) == 0; 300 } 301 }; 302 303 // The `Inner` struct contains the actual cached contents and shared between 304 // the `SharedImmutableStringsCache` singleton and all 305 // `SharedImmutable[TwoByte]String` holders. 306 struct Inner { 307 using Set = HashSet<StringBox::Ptr, Hasher, SystemAllocPolicy>; 308 309 Set set; 310 311 Inner() = default; 312 313 Inner(const Inner&) = delete; 314 Inner& operator=(const Inner&) = delete; 315 }; 316 317 const ExclusiveData<Inner>* inner_ = nullptr; 318 }; 319 320 /** 321 * The `SharedImmutableString` class holds a reference to a `const char*` string 322 * from the `SharedImmutableStringsCache` and releases the reference upon 323 * destruction. 324 */ 325 class SharedImmutableString { 326 friend class SharedImmutableStringsCache; 327 friend class SharedImmutableTwoByteString; 328 329 mutable SharedImmutableStringsCache::StringBox* box_; 330 331 explicit SharedImmutableString(SharedImmutableStringsCache::StringBox* box); 332 333 public: 334 /** 335 * `SharedImmutableString`s are move-able. It is an error to use a 336 * `SharedImmutableString` after it has been moved. 337 */ 338 SharedImmutableString(SharedImmutableString&& rhs); 339 SharedImmutableString& operator=(SharedImmutableString&& rhs); 340 SharedImmutableString() { box_ = nullptr; } 341 342 /** 343 * Create another shared reference to the underlying string. 344 */ 345 SharedImmutableString clone() const; 346 347 // If you want a copy, take one explicitly with `clone`! 348 SharedImmutableString& operator=(const SharedImmutableString&) = delete; 349 explicit operator bool() const { return box_ != nullptr; } 350 351 ~SharedImmutableString(); 352 353 /** 354 * Get a raw pointer to the underlying string. It is only safe to use the 355 * resulting pointer while this `SharedImmutableString` exists. 356 */ 357 const char* chars() const { 358 MOZ_ASSERT(box_); 359 MOZ_ASSERT(box_->refcount > 0); 360 MOZ_ASSERT(box_->chars()); 361 return box_->chars(); 362 } 363 364 /** 365 * Get the length of the underlying string. 366 */ 367 size_t length() const { 368 MOZ_ASSERT(box_); 369 MOZ_ASSERT(box_->refcount > 0); 370 MOZ_ASSERT(box_->chars()); 371 return box_->length(); 372 } 373 }; 374 375 /** 376 * The `SharedImmutableTwoByteString` class holds a reference to a `const 377 * char16_t*` string from the `SharedImmutableStringsCache` and releases the 378 * reference upon destruction. 379 */ 380 class SharedImmutableTwoByteString { 381 friend class SharedImmutableStringsCache; 382 383 // If a `char*` string and `char16_t*` string happen to have the same bytes, 384 // the bytes will be shared but handed out as different types. 385 SharedImmutableString string_; 386 387 explicit SharedImmutableTwoByteString(SharedImmutableString&& string); 388 explicit SharedImmutableTwoByteString( 389 SharedImmutableStringsCache::StringBox* box); 390 391 public: 392 /** 393 * `SharedImmutableTwoByteString`s are move-able. It is an error to use a 394 * `SharedImmutableTwoByteString` after it has been moved. 395 */ 396 SharedImmutableTwoByteString(SharedImmutableTwoByteString&& rhs); 397 SharedImmutableTwoByteString& operator=(SharedImmutableTwoByteString&& rhs); 398 SharedImmutableTwoByteString() { string_.box_ = nullptr; } 399 400 /** 401 * Create another shared reference to the underlying string. 402 */ 403 SharedImmutableTwoByteString clone() const; 404 405 // If you want a copy, take one explicitly with `clone`! 406 SharedImmutableTwoByteString& operator=(const SharedImmutableTwoByteString&) = 407 delete; 408 explicit operator bool() const { return string_.box_ != nullptr; } 409 /** 410 * Get a raw pointer to the underlying string. It is only safe to use the 411 * resulting pointer while this `SharedImmutableTwoByteString` exists. 412 */ 413 const char16_t* chars() const { 414 return reinterpret_cast<const char16_t*>(string_.chars()); 415 } 416 417 /** 418 * Get the length of the underlying string. 419 */ 420 size_t length() const { return string_.length() / sizeof(char16_t); } 421 }; 422 423 } // namespace js 424 425 #endif // vm_SharedImmutableStringsCache_h