tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SharedImmutableStringsCache.h (13750B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef vm_SharedImmutableStringsCache_h
      8 #define vm_SharedImmutableStringsCache_h
      9 
     10 #include "mozilla/Maybe.h"
     11 #include "mozilla/UniquePtr.h"
     12 
     13 #include <cstring>
     14 #include <new>      // for placement new
     15 #include <utility>  // std::move
     16 
     17 #include "js/AllocPolicy.h"
     18 #include "js/HashTable.h"
     19 #include "js/UniquePtr.h"
     20 #include "js/Utility.h"
     21 
     22 #include "threading/ExclusiveData.h"
     23 
     24 namespace js {
     25 
     26 class SharedImmutableString;
     27 class SharedImmutableTwoByteString;
     28 
     29 /**
     30 * The `SharedImmutableStringsCache` allows safely sharing and deduplicating
     31 * immutable strings (either `const char*` [any encoding, not restricted to
     32 * only Latin-1 or only UTF-8] or `const char16_t*`) between threads.
     33 *
     34 * The locking mechanism is dead-simple and coarse grained: a single lock guards
     35 * all of the internal table itself, the table's entries, and the entries'
     36 * reference counts. It is only safe to perform any mutation on the cache or any
     37 * data stored within the cache when this lock is acquired.
     38 */
     39 class SharedImmutableStringsCache {
     40  static SharedImmutableStringsCache singleton_;
     41 
     42  friend class SharedImmutableString;
     43  friend class SharedImmutableTwoByteString;
     44  struct Hasher;
     45 
     46 public:
     47  using OwnedChars = JS::UniqueChars;
     48  using OwnedTwoByteChars = JS::UniqueTwoByteChars;
     49 
     50  /**
     51   * Get the canonical, shared, and de-duplicated version of the given `const
     52   * char*` string. If such a string does not exist, call `intoOwnedChars` and
     53   * add the string it returns to the cache.
     54   *
     55   * `intoOwnedChars` must create an owned version of the given string, and
     56   * must have one of the following types:
     57   *
     58   *     JS::UniqueChars   intoOwnedChars();
     59   *     JS::UniqueChars&& intoOwnedChars();
     60   *
     61   * It can be used by callers to elide a copy of the string when it is safe
     62   * to give up ownership of the lookup string to the cache. It must return a
     63   * `nullptr` on failure.
     64   *
     65   * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
     66   * returned.
     67   */
     68  template <typename IntoOwnedChars>
     69  [[nodiscard]] SharedImmutableString getOrCreate(
     70      const char* chars, size_t length, IntoOwnedChars intoOwnedChars);
     71 
     72  /**
     73   * Take ownership of the given `chars` and return the canonical, shared and
     74   * de-duplicated version.
     75   *
     76   * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
     77   * returned.
     78   */
     79  [[nodiscard]] SharedImmutableString getOrCreate(OwnedChars&& chars,
     80                                                  size_t length);
     81 
     82  /**
     83   * Do not take ownership of the given `chars`. Return the canonical, shared
     84   * and de-duplicated version. If there is no extant shared version of
     85   * `chars`, make a copy and insert it into the cache.
     86   *
     87   * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
     88   * returned.
     89   */
     90  [[nodiscard]] SharedImmutableString getOrCreate(const char* chars,
     91                                                  size_t length);
     92 
     93  /**
     94   * Get the canonical, shared, and de-duplicated version of the given `const
     95   * char16_t*` string. If such a string does not exist, call `intoOwnedChars`
     96   * and add the string it returns to the cache.
     97   *
     98   * `intoOwnedTwoByteChars` must create an owned version of the given string,
     99   * and must have one of the following types:
    100   *
    101   *     JS::UniqueTwoByteChars   intoOwnedTwoByteChars();
    102   *     JS::UniqueTwoByteChars&& intoOwnedTwoByteChars();
    103   *
    104   * It can be used by callers to elide a copy of the string when it is safe
    105   * to give up ownership of the lookup string to the cache. It must return a
    106   * `nullptr` on failure.
    107   *
    108   * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
    109   * returned.
    110   */
    111  template <typename IntoOwnedTwoByteChars>
    112  [[nodiscard]] SharedImmutableTwoByteString getOrCreate(
    113      const char16_t* chars, size_t length,
    114      IntoOwnedTwoByteChars intoOwnedTwoByteChars);
    115 
    116  /**
    117   * Take ownership of the given `chars` and return the canonical, shared and
    118   * de-duplicated version.
    119   *
    120   * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
    121   * returned.
    122   */
    123  [[nodiscard]] SharedImmutableTwoByteString getOrCreate(
    124      OwnedTwoByteChars&& chars, size_t length);
    125 
    126  /**
    127   * Do not take ownership of the given `chars`. Return the canonical, shared
    128   * and de-duplicated version. If there is no extant shared version of
    129   * `chars`, then make a copy and insert it into the cache.
    130   *
    131   * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
    132   * returned.
    133   */
    134  [[nodiscard]] SharedImmutableTwoByteString getOrCreate(const char16_t* chars,
    135                                                         size_t length);
    136 
    137  size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const {
    138    MOZ_ASSERT(inner_);
    139    size_t n = mallocSizeOf(inner_);
    140 
    141    auto locked = inner_->lock();
    142 
    143    // Size of the table.
    144    n += locked->set.shallowSizeOfExcludingThis(mallocSizeOf);
    145 
    146    // Sizes of the strings and their boxes.
    147    for (auto r = locked->set.all(); !r.empty(); r.popFront()) {
    148      n += mallocSizeOf(r.front().get());
    149      if (const char* chars = r.front()->chars()) {
    150        n += mallocSizeOf(chars);
    151      }
    152    }
    153 
    154    return n;
    155  }
    156 
    157 private:
    158  bool init();
    159  void free();
    160 
    161 public:
    162  static bool initSingleton();
    163  static void freeSingleton();
    164 
    165  static SharedImmutableStringsCache& getSingleton() {
    166    MOZ_ASSERT(singleton_.inner_);
    167    return singleton_;
    168  }
    169 
    170 private:
    171  SharedImmutableStringsCache() = default;
    172  ~SharedImmutableStringsCache() = default;
    173 
    174 public:
    175  SharedImmutableStringsCache(const SharedImmutableStringsCache& rhs) = delete;
    176  SharedImmutableStringsCache(SharedImmutableStringsCache&& rhs) = delete;
    177 
    178  SharedImmutableStringsCache& operator=(SharedImmutableStringsCache&& rhs) =
    179      delete;
    180 
    181  SharedImmutableStringsCache& operator=(const SharedImmutableStringsCache&) =
    182      delete;
    183 
    184  /**
    185   * Purge the cache of all refcount == 0 entries.
    186   */
    187  void purge() {
    188    auto locked = inner_->lock();
    189 
    190    for (Inner::Set::Enum e(locked->set); !e.empty(); e.popFront()) {
    191      if (e.front()->refcount == 0) {
    192        // The chars should be eagerly freed when refcount reaches zero.
    193        MOZ_ASSERT(!e.front()->chars());
    194        e.removeFront();
    195      } else {
    196        // The chars should exist as long as the refcount is non-zero.
    197        MOZ_ASSERT(e.front()->chars());
    198      }
    199    }
    200  }
    201 
    202 private:
    203  struct Inner;
    204  class StringBox {
    205    friend class SharedImmutableString;
    206 
    207    OwnedChars chars_;
    208    size_t length_;
    209    const ExclusiveData<Inner>* cache_;
    210 
    211   public:
    212    mutable size_t refcount;
    213 
    214    using Ptr = js::UniquePtr<StringBox>;
    215 
    216    StringBox(OwnedChars&& chars, size_t length,
    217              const ExclusiveData<Inner>* cache)
    218        : chars_(std::move(chars)),
    219          length_(length),
    220          cache_(cache),
    221          refcount(0) {
    222      MOZ_ASSERT(chars_);
    223    }
    224 
    225    static Ptr Create(OwnedChars&& chars, size_t length,
    226                      const ExclusiveData<Inner>* cache) {
    227      return js::MakeUnique<StringBox>(std::move(chars), length, cache);
    228    }
    229 
    230    StringBox(const StringBox&) = delete;
    231    StringBox& operator=(const StringBox&) = delete;
    232 
    233    ~StringBox() {
    234      MOZ_RELEASE_ASSERT(
    235          refcount == 0,
    236          "There are `SharedImmutable[TwoByte]String`s outliving their "
    237          "associated cache! This always leads to use-after-free in the "
    238          "`~SharedImmutableString` destructor!");
    239    }
    240 
    241    const char* chars() const { return chars_.get(); }
    242    size_t length() const { return length_; }
    243  };
    244 
    245  struct Hasher {
    246    /**
    247     * A structure used when querying for a `const char*` string in the cache.
    248     */
    249    class Lookup {
    250      friend struct Hasher;
    251 
    252      HashNumber hash_;
    253      const char* chars_;
    254      size_t length_;
    255 
    256     public:
    257      Lookup(HashNumber hash, const char* chars, size_t length)
    258          : hash_(hash), chars_(chars), length_(length) {
    259        MOZ_ASSERT(chars_);
    260        MOZ_ASSERT(hash == Hasher::hashLongString(chars, length));
    261      }
    262 
    263      Lookup(HashNumber hash, const char16_t* chars, size_t length)
    264          : Lookup(hash, reinterpret_cast<const char*>(chars),
    265                   length * sizeof(char16_t)) {}
    266    };
    267 
    268    static const size_t SHORT_STRING_MAX_LENGTH = 8192;
    269    static const size_t HASH_CHUNK_LENGTH = SHORT_STRING_MAX_LENGTH / 2;
    270 
    271    // For strings longer than SHORT_STRING_MAX_LENGTH, we only hash the
    272    // first HASH_CHUNK_LENGTH and last HASH_CHUNK_LENGTH characters in the
    273    // string. This increases the risk of collisions, but in practice it
    274    // should be rare, and it yields a large speedup for hashing long
    275    // strings.
    276    static HashNumber hashLongString(const char* chars, size_t length) {
    277      MOZ_ASSERT(chars);
    278      return length <= SHORT_STRING_MAX_LENGTH
    279                 ? mozilla::HashString(chars, length)
    280                 : mozilla::AddToHash(
    281                       mozilla::HashString(chars, HASH_CHUNK_LENGTH),
    282                       mozilla::HashString(chars + length - HASH_CHUNK_LENGTH,
    283                                           HASH_CHUNK_LENGTH));
    284    }
    285 
    286    static HashNumber hash(const Lookup& lookup) { return lookup.hash_; }
    287 
    288    static bool match(const StringBox::Ptr& key, const Lookup& lookup) {
    289      MOZ_ASSERT(lookup.chars_);
    290 
    291      if (!key->chars() || key->length() != lookup.length_) {
    292        return false;
    293      }
    294 
    295      if (key->chars() == lookup.chars_) {
    296        return true;
    297      }
    298 
    299      return memcmp(key->chars(), lookup.chars_, key->length()) == 0;
    300    }
    301  };
    302 
    303  // The `Inner` struct contains the actual cached contents and shared between
    304  // the `SharedImmutableStringsCache` singleton and all
    305  // `SharedImmutable[TwoByte]String` holders.
    306  struct Inner {
    307    using Set = HashSet<StringBox::Ptr, Hasher, SystemAllocPolicy>;
    308 
    309    Set set;
    310 
    311    Inner() = default;
    312 
    313    Inner(const Inner&) = delete;
    314    Inner& operator=(const Inner&) = delete;
    315  };
    316 
    317  const ExclusiveData<Inner>* inner_ = nullptr;
    318 };
    319 
    320 /**
    321 * The `SharedImmutableString` class holds a reference to a `const char*` string
    322 * from the `SharedImmutableStringsCache` and releases the reference upon
    323 * destruction.
    324 */
    325 class SharedImmutableString {
    326  friend class SharedImmutableStringsCache;
    327  friend class SharedImmutableTwoByteString;
    328 
    329  mutable SharedImmutableStringsCache::StringBox* box_;
    330 
    331  explicit SharedImmutableString(SharedImmutableStringsCache::StringBox* box);
    332 
    333 public:
    334  /**
    335   * `SharedImmutableString`s are move-able. It is an error to use a
    336   * `SharedImmutableString` after it has been moved.
    337   */
    338  SharedImmutableString(SharedImmutableString&& rhs);
    339  SharedImmutableString& operator=(SharedImmutableString&& rhs);
    340  SharedImmutableString() { box_ = nullptr; }
    341 
    342  /**
    343   * Create another shared reference to the underlying string.
    344   */
    345  SharedImmutableString clone() const;
    346 
    347  // If you want a copy, take one explicitly with `clone`!
    348  SharedImmutableString& operator=(const SharedImmutableString&) = delete;
    349  explicit operator bool() const { return box_ != nullptr; }
    350 
    351  ~SharedImmutableString();
    352 
    353  /**
    354   * Get a raw pointer to the underlying string. It is only safe to use the
    355   * resulting pointer while this `SharedImmutableString` exists.
    356   */
    357  const char* chars() const {
    358    MOZ_ASSERT(box_);
    359    MOZ_ASSERT(box_->refcount > 0);
    360    MOZ_ASSERT(box_->chars());
    361    return box_->chars();
    362  }
    363 
    364  /**
    365   * Get the length of the underlying string.
    366   */
    367  size_t length() const {
    368    MOZ_ASSERT(box_);
    369    MOZ_ASSERT(box_->refcount > 0);
    370    MOZ_ASSERT(box_->chars());
    371    return box_->length();
    372  }
    373 };
    374 
    375 /**
    376 * The `SharedImmutableTwoByteString` class holds a reference to a `const
    377 * char16_t*` string from the `SharedImmutableStringsCache` and releases the
    378 * reference upon destruction.
    379 */
    380 class SharedImmutableTwoByteString {
    381  friend class SharedImmutableStringsCache;
    382 
    383  // If a `char*` string and `char16_t*` string happen to have the same bytes,
    384  // the bytes will be shared but handed out as different types.
    385  SharedImmutableString string_;
    386 
    387  explicit SharedImmutableTwoByteString(SharedImmutableString&& string);
    388  explicit SharedImmutableTwoByteString(
    389      SharedImmutableStringsCache::StringBox* box);
    390 
    391 public:
    392  /**
    393   * `SharedImmutableTwoByteString`s are move-able. It is an error to use a
    394   * `SharedImmutableTwoByteString` after it has been moved.
    395   */
    396  SharedImmutableTwoByteString(SharedImmutableTwoByteString&& rhs);
    397  SharedImmutableTwoByteString& operator=(SharedImmutableTwoByteString&& rhs);
    398  SharedImmutableTwoByteString() { string_.box_ = nullptr; }
    399 
    400  /**
    401   * Create another shared reference to the underlying string.
    402   */
    403  SharedImmutableTwoByteString clone() const;
    404 
    405  // If you want a copy, take one explicitly with `clone`!
    406  SharedImmutableTwoByteString& operator=(const SharedImmutableTwoByteString&) =
    407      delete;
    408  explicit operator bool() const { return string_.box_ != nullptr; }
    409  /**
    410   * Get a raw pointer to the underlying string. It is only safe to use the
    411   * resulting pointer while this `SharedImmutableTwoByteString` exists.
    412   */
    413  const char16_t* chars() const {
    414    return reinterpret_cast<const char16_t*>(string_.chars());
    415  }
    416 
    417  /**
    418   * Get the length of the underlying string.
    419   */
    420  size_t length() const { return string_.length() / sizeof(char16_t); }
    421 };
    422 
    423 }  // namespace js
    424 
    425 #endif  // vm_SharedImmutableStringsCache_h