tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

StringBuilder.h (17856B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef util_StringBuilder_h
      8 #define util_StringBuilder_h
      9 
     10 #include "mozilla/CheckedInt.h"
     11 #include "mozilla/MaybeOneOf.h"
     12 #include "mozilla/Utf8.h"
     13 
     14 #include "frontend/FrontendContext.h"
     15 #include "js/Vector.h"
     16 #include "vm/Runtime.h"
     17 #include "vm/StringType.h"
     18 
     19 namespace js {
     20 
     21 class FrontendContext;
     22 
     23 namespace frontend {
     24 class ParserAtomsTable;
     25 class TaggedParserAtomIndex;
     26 }  // namespace frontend
     27 
     28 namespace detail {
     29 
     30 // GrowEltsAggressively will multiply the space by a factor of 8 on overflow, to
     31 // avoid very expensive memcpys for large strings (eg giant toJSON output for
     32 // sessionstore.js). Drop back to the normal expansion policy once the buffer
     33 // hits 128MB.
     34 static constexpr size_t AggressiveLimit = 128 << 20;
     35 
     36 template <size_t EltSize>
     37 inline size_t GrowEltsAggressively(size_t aOldElts, size_t aIncr) {
     38  mozilla::CheckedInt<size_t> required =
     39      mozilla::CheckedInt<size_t>(aOldElts) + aIncr;
     40  if (!(required * 2).isValid()) {
     41    return 0;
     42  }
     43  required = mozilla::RoundUpPow2(required.value());
     44  required *= 8;
     45  if (!(required * EltSize).isValid() || required.value() > AggressiveLimit) {
     46    // Fall back to doubling behavior if the aggressive growth fails or gets too
     47    // big.
     48    return mozilla::detail::GrowEltsByDoubling<EltSize>(aOldElts, aIncr);
     49  }
     50  return required.value();
     51 };
     52 
     53 }  // namespace detail
     54 
     55 class StringBuilderAllocPolicy {
     56  TempAllocPolicy impl_;
     57  const arena_id_t& arenaId_;
     58 
     59 public:
     60  StringBuilderAllocPolicy(FrontendContext* fc, const arena_id_t& arenaId)
     61      : impl_(fc), arenaId_(arenaId) {}
     62 
     63  StringBuilderAllocPolicy(JSContext* cx, const arena_id_t& arenaId)
     64      : impl_(cx), arenaId_(arenaId) {}
     65 
     66  template <typename T>
     67  T* maybe_pod_malloc(size_t numElems) {
     68    return impl_.maybe_pod_arena_malloc<T>(arenaId_, numElems);
     69  }
     70  template <typename T>
     71  T* maybe_pod_calloc(size_t numElems) {
     72    return impl_.maybe_pod_arena_calloc<T>(arenaId_, numElems);
     73  }
     74  template <typename T>
     75  T* maybe_pod_realloc(T* p, size_t oldSize, size_t newSize) {
     76    return impl_.maybe_pod_arena_realloc<T>(arenaId_, p, oldSize, newSize);
     77  }
     78  template <typename T>
     79  T* pod_malloc(size_t numElems) {
     80    return impl_.pod_arena_malloc<T>(arenaId_, numElems);
     81  }
     82  template <typename T>
     83  T* pod_calloc(size_t numElems) {
     84    return impl_.pod_arena_calloc<T>(arenaId_, numElems);
     85  }
     86  template <typename T>
     87  T* pod_realloc(T* p, size_t oldSize, size_t newSize) {
     88    return impl_.pod_arena_realloc<T>(arenaId_, p, oldSize, newSize);
     89  }
     90  template <typename T>
     91  void free_(T* p, size_t numElems = 0) {
     92    impl_.free_(p, numElems);
     93  }
     94  void reportAllocOverflow() const { impl_.reportAllocOverflow(); }
     95  bool checkSimulatedOOM() const { return impl_.checkSimulatedOOM(); }
     96 
     97  // See ComputeGrowth in mfbt/Vector.h.
     98  template <size_t EltSize>
     99  static size_t computeGrowth(size_t aOldElts, size_t aIncr) {
    100    return detail::GrowEltsAggressively<EltSize>(aOldElts, aIncr);
    101  }
    102 };
    103 
    104 /*
    105 * String builder that eagerly checks for over-allocation past the maximum
    106 * string length.
    107 *
    108 * Any operation which would exceed the maximum string length causes an
    109 * exception report on the context and results in a failed return value.
    110 *
    111 * Well-sized extractions (which waste no more than 1/4 of their char
    112 * buffer space) are guaranteed for strings built by this interface.
    113 * See |extractWellSized|.
    114 */
    115 class StringBuilder {
    116 protected:
    117  template <typename CharT>
    118  using BufferType =
    119      Vector<CharT, 80 / sizeof(CharT), StringBuilderAllocPolicy>;
    120 
    121  /*
    122   * The Vector's buffer may be either stolen or copied, so we need to use
    123   * TempAllocPolicy and account for the memory manually when stealing.
    124   */
    125  using Latin1CharBuffer = BufferType<Latin1Char>;
    126  using TwoByteCharBuffer = BufferType<char16_t>;
    127 
    128  JSContext* maybeCx_ = nullptr;
    129 
    130  // cb starts out as a Latin1CharBuffer. When a TwoByte char is appended,
    131  // inflateChars() constructs a TwoByteCharBuffer and copies the Latin1 chars.
    132  //
    133  // Note that this buffer can include extra zero bytes for the string buffer
    134  // header. See numHeaderChars_ below.
    135  mozilla::MaybeOneOf<Latin1CharBuffer, TwoByteCharBuffer> cb;
    136 
    137  // Number of reserve()'d chars, see inflateChars. Does not include
    138  // numHeaderChars_.
    139  size_t reservedExclHeader_ = 0;
    140 
    141  // The number of '\0' characters prepended by JSStringBuilder to reserve space
    142  // for the mozilla::StringBuffer header. This is always non-zero for
    143  // JSStringBuilder and zero otherwise.
    144  //
    145  // Note that this is an implementation detail: public methods such as |begin|
    146  // and |length| return the actual string contents/length without these extra
    147  // characters.
    148  uint8_t numHeaderChars_ = 0;
    149 
    150  StringBuilder(const StringBuilder& other) = delete;
    151  void operator=(const StringBuilder& other) = delete;
    152 
    153  // Returns the number of characters to prepend to reserve enough space for the
    154  // mozilla::StringBuffer header.
    155  template <typename CharT>
    156  static constexpr size_t numHeaderChars() {
    157    static_assert(sizeof(mozilla::StringBuffer) % sizeof(CharT) == 0);
    158    return sizeof(mozilla::StringBuffer) / sizeof(CharT);
    159  }
    160 
    161  template <typename CharT>
    162  MOZ_ALWAYS_INLINE bool isCharType() const {
    163    return cb.constructed<BufferType<CharT>>();
    164  }
    165 
    166  MOZ_ALWAYS_INLINE bool isLatin1() const { return isCharType<Latin1Char>(); }
    167 
    168  MOZ_ALWAYS_INLINE bool isTwoByte() const { return isCharType<char16_t>(); }
    169 
    170  template <typename CharT>
    171  MOZ_ALWAYS_INLINE BufferType<CharT>& chars() {
    172    MOZ_ASSERT(isCharType<CharT>());
    173    return cb.ref<BufferType<CharT>>();
    174  }
    175 
    176  template <typename CharT>
    177  MOZ_ALWAYS_INLINE const BufferType<CharT>& chars() const {
    178    MOZ_ASSERT(isCharType<CharT>());
    179    return cb.ref<BufferType<CharT>>();
    180  }
    181 
    182  MOZ_ALWAYS_INLINE TwoByteCharBuffer& twoByteChars() {
    183    return chars<char16_t>();
    184  }
    185 
    186  MOZ_ALWAYS_INLINE const TwoByteCharBuffer& twoByteChars() const {
    187    return chars<char16_t>();
    188  }
    189 
    190  MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() {
    191    return chars<Latin1Char>();
    192  }
    193 
    194  MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const {
    195    return chars<Latin1Char>();
    196  }
    197 
    198  [[nodiscard]] bool inflateChars();
    199 
    200  template <typename CharT>
    201  JSLinearString* finishStringInternal(JSContext* cx, gc::Heap heap);
    202 
    203 public:
    204  explicit StringBuilder(JSContext* cx,
    205                         const arena_id_t& arenaId = js::MallocArena)
    206      : maybeCx_(cx) {
    207    MOZ_ASSERT(cx);
    208    cb.construct<Latin1CharBuffer>(StringBuilderAllocPolicy{cx, arenaId});
    209  }
    210 
    211  // This constructor should only be used if the methods related to the
    212  // following are not used, because they require a JSContext:
    213  //   * JSString
    214  //   * JSAtom
    215  //   * mozilla::Utf8Unit
    216  explicit StringBuilder(FrontendContext* fc,
    217                         const arena_id_t& arenaId = js::MallocArena) {
    218    MOZ_ASSERT(fc);
    219    cb.construct<Latin1CharBuffer>(StringBuilderAllocPolicy{fc, arenaId});
    220  }
    221 
    222  void clear() { shrinkTo(0); }
    223 
    224  [[nodiscard]] bool reserve(size_t len) {
    225    auto lenWithHeader = mozilla::CheckedInt<size_t>(len) + numHeaderChars_;
    226    if (MOZ_UNLIKELY(!lenWithHeader.isValid())) {
    227      ReportAllocationOverflow(maybeCx_);
    228      return false;
    229    }
    230    if (len > reservedExclHeader_) {
    231      reservedExclHeader_ = len;
    232    }
    233    return isLatin1() ? latin1Chars().reserve(lenWithHeader.value())
    234                      : twoByteChars().reserve(lenWithHeader.value());
    235  }
    236  [[nodiscard]] bool growByUninitialized(size_t incr) {
    237    auto newLenWithHeader =
    238        mozilla::CheckedInt<size_t>(length()) + numHeaderChars_ + incr;
    239    if (MOZ_UNLIKELY(!newLenWithHeader.isValid())) {
    240      ReportAllocationOverflow(maybeCx_);
    241      return false;
    242    }
    243    return isLatin1() ? latin1Chars().growByUninitialized(incr)
    244                      : twoByteChars().growByUninitialized(incr);
    245  }
    246  void shrinkTo(size_t newLength) {
    247    // Note: this can't overflow because the new length must be <= the current
    248    // length.
    249    newLength += numHeaderChars_;
    250    if (isLatin1()) {
    251      latin1Chars().shrinkTo(newLength);
    252    } else {
    253      twoByteChars().shrinkTo(newLength);
    254    }
    255  }
    256  bool empty() const { return length() == 0; }
    257  size_t length() const {
    258    size_t len = isLatin1() ? latin1Chars().length() : twoByteChars().length();
    259    MOZ_ASSERT(len >= numHeaderChars_);
    260    return len - numHeaderChars_;
    261  }
    262  char16_t getChar(size_t idx) const {
    263    idx += numHeaderChars_;
    264    return isLatin1() ? latin1Chars()[idx] : twoByteChars()[idx];
    265  }
    266 
    267  [[nodiscard]] bool ensureTwoByteChars() {
    268    return isTwoByte() || inflateChars();
    269  }
    270 
    271  [[nodiscard]] bool append(const char16_t c) {
    272    if (isLatin1()) {
    273      if (c <= JSString::MAX_LATIN1_CHAR) {
    274        return latin1Chars().append(Latin1Char(c));
    275      }
    276      if (!inflateChars()) {
    277        return false;
    278      }
    279    }
    280    return twoByteChars().append(c);
    281  }
    282  [[nodiscard]] bool append(Latin1Char c) {
    283    return isLatin1() ? latin1Chars().append(c) : twoByteChars().append(c);
    284  }
    285  [[nodiscard]] bool append(char c) { return append(Latin1Char(c)); }
    286 
    287  [[nodiscard]] inline bool append(const char16_t* begin, const char16_t* end);
    288 
    289  [[nodiscard]] bool append(const char16_t* chars, size_t len) {
    290    return append(chars, chars + len);
    291  }
    292 
    293  [[nodiscard]] bool append(const Latin1Char* begin, const Latin1Char* end) {
    294    return isLatin1() ? latin1Chars().append(begin, end)
    295                      : twoByteChars().append(begin, end);
    296  }
    297  [[nodiscard]] bool append(const Latin1Char* chars, size_t len) {
    298    return append(chars, chars + len);
    299  }
    300 
    301  /**
    302   * Interpret the provided count of UTF-8 code units as UTF-8, and append
    303   * the represented code points to this.  If the code units contain invalid
    304   * UTF-8, leave the internal buffer in a consistent but unspecified state,
    305   * report an error, and return false.
    306   */
    307  [[nodiscard]] bool append(const mozilla::Utf8Unit* units, size_t len);
    308 
    309  [[nodiscard]] bool append(const JS::ConstCharPtr chars, size_t len) {
    310    return append(chars.get(), chars.get() + len);
    311  }
    312  [[nodiscard]] bool appendN(Latin1Char c, size_t n) {
    313    return isLatin1() ? latin1Chars().appendN(c, n)
    314                      : twoByteChars().appendN(c, n);
    315  }
    316 
    317  [[nodiscard]] inline bool append(JSString* str);
    318  [[nodiscard]] inline bool append(const JSLinearString* str);
    319  [[nodiscard]] inline bool appendSubstring(JSString* base, size_t off,
    320                                            size_t len);
    321  [[nodiscard]] inline bool appendSubstring(const JSLinearString* base,
    322                                            size_t off, size_t len);
    323  [[nodiscard]] bool append(const frontend::ParserAtomsTable& parserAtoms,
    324                            frontend::TaggedParserAtomIndex atom);
    325 
    326  [[nodiscard]] bool append(const char* chars, size_t len) {
    327    return append(reinterpret_cast<const Latin1Char*>(chars), len);
    328  }
    329 
    330  template <size_t ArrayLength>
    331  [[nodiscard]] bool append(const char (&array)[ArrayLength]) {
    332    return append(array, ArrayLength - 1); /* No trailing '\0'. */
    333  }
    334 
    335  /* Infallible variants usable when the corresponding space is reserved. */
    336  void infallibleAppend(Latin1Char c) {
    337    if (isLatin1()) {
    338      latin1Chars().infallibleAppend(c);
    339    } else {
    340      twoByteChars().infallibleAppend(c);
    341    }
    342  }
    343  void infallibleAppend(char c) { infallibleAppend(Latin1Char(c)); }
    344  void infallibleAppend(const Latin1Char* chars, size_t len) {
    345    if (isLatin1()) {
    346      latin1Chars().infallibleAppend(chars, len);
    347    } else {
    348      twoByteChars().infallibleAppend(chars, len);
    349    }
    350  }
    351  void infallibleAppend(const char* chars, size_t len) {
    352    infallibleAppend(reinterpret_cast<const Latin1Char*>(chars), len);
    353  }
    354 
    355  void infallibleAppendSubstring(const JSLinearString* base, size_t off,
    356                                 size_t len);
    357 
    358  /*
    359   * Because inflation is fallible, these methods should only be used after
    360   * calling ensureTwoByteChars().
    361   */
    362  void infallibleAppend(const char16_t* chars, size_t len) {
    363    twoByteChars().infallibleAppend(chars, len);
    364  }
    365  void infallibleAppend(char16_t c) { twoByteChars().infallibleAppend(c); }
    366 
    367  bool isUnderlyingBufferLatin1() const { return isLatin1(); }
    368 
    369  template <typename CharT>
    370  CharT* begin() {
    371    MOZ_ASSERT(chars<CharT>().length() >= numHeaderChars_);
    372    return chars<CharT>().begin() + numHeaderChars_;
    373  }
    374 
    375  template <typename CharT>
    376  CharT* end() {
    377    return chars<CharT>().end();
    378  }
    379 
    380  template <typename CharT>
    381  const CharT* begin() const {
    382    MOZ_ASSERT(chars<CharT>().length() >= numHeaderChars_);
    383    return chars<CharT>().begin() + numHeaderChars_;
    384  }
    385 
    386  template <typename CharT>
    387  const CharT* end() const {
    388    return chars<CharT>().end();
    389  }
    390 
    391  char16_t* rawTwoByteBegin() { return begin<char16_t>(); }
    392  char16_t* rawTwoByteEnd() { return end<char16_t>(); }
    393  const char16_t* rawTwoByteBegin() const { return begin<char16_t>(); }
    394  const char16_t* rawTwoByteEnd() const { return end<char16_t>(); }
    395 
    396  Latin1Char* rawLatin1Begin() { return begin<Latin1Char>(); }
    397  Latin1Char* rawLatin1End() { return end<Latin1Char>(); }
    398  const Latin1Char* rawLatin1Begin() const { return begin<Latin1Char>(); }
    399  const Latin1Char* rawLatin1End() const { return end<Latin1Char>(); }
    400 
    401  /* Identical to finishString() except that an atom is created. */
    402  JSAtom* finishAtom();
    403  frontend::TaggedParserAtomIndex finishParserAtom(
    404      frontend::ParserAtomsTable& parserAtoms, FrontendContext* fc);
    405 
    406  /*
    407   * Creates a raw string from the characters in this buffer.  The string is
    408   * exactly the characters in this buffer (inflated to TwoByte), it is *not*
    409   * null-terminated unless the last appended character was '\0'.
    410   */
    411  char16_t* stealChars();
    412 };
    413 
    414 // Like StringBuilder, but uses StringBufferArena for the characters.
    415 class JSStringBuilder : public StringBuilder {
    416 public:
    417  explicit JSStringBuilder(JSContext* cx)
    418      : StringBuilder(cx, js::StringBufferArena) {
    419    // Reserve space for the mozilla::StringBuffer header.
    420    numHeaderChars_ = numHeaderChars<Latin1Char>();
    421    MOZ_ALWAYS_TRUE(latin1Chars().appendN('\0', numHeaderChars_));
    422  }
    423 
    424  /*
    425   * Creates a string from the characters in this buffer, then (regardless
    426   * whether string creation succeeded or failed) empties the buffer.
    427   *
    428   * Returns nullptr if string creation failed.
    429   */
    430  JSLinearString* finishString(gc::Heap heap = gc::Heap::Default);
    431 };
    432 
    433 inline bool StringBuilder::append(const char16_t* begin, const char16_t* end) {
    434  MOZ_ASSERT(begin <= end);
    435  if (isLatin1()) {
    436    while (true) {
    437      if (begin >= end) {
    438        return true;
    439      }
    440      if (*begin > JSString::MAX_LATIN1_CHAR) {
    441        break;
    442      }
    443      if (!latin1Chars().append(*begin)) {
    444        return false;
    445      }
    446      ++begin;
    447    }
    448    if (!inflateChars()) {
    449      return false;
    450    }
    451  }
    452  return twoByteChars().append(begin, end);
    453 }
    454 
    455 inline bool StringBuilder::append(const JSLinearString* str) {
    456  JS::AutoCheckCannotGC nogc;
    457  if (isLatin1()) {
    458    if (str->hasLatin1Chars()) {
    459      return latin1Chars().append(str->latin1Chars(nogc), str->length());
    460    }
    461    if (!inflateChars()) {
    462      return false;
    463    }
    464  }
    465  return str->hasLatin1Chars()
    466             ? twoByteChars().append(str->latin1Chars(nogc), str->length())
    467             : twoByteChars().append(str->twoByteChars(nogc), str->length());
    468 }
    469 
    470 inline void StringBuilder::infallibleAppendSubstring(const JSLinearString* base,
    471                                                     size_t off, size_t len) {
    472  MOZ_ASSERT(off + len <= base->length());
    473  MOZ_ASSERT_IF(base->hasTwoByteChars(), isTwoByte());
    474 
    475  JS::AutoCheckCannotGC nogc;
    476  if (base->hasLatin1Chars()) {
    477    infallibleAppend(base->latin1Chars(nogc) + off, len);
    478  } else {
    479    infallibleAppend(base->twoByteChars(nogc) + off, len);
    480  }
    481 }
    482 
    483 inline bool StringBuilder::appendSubstring(const JSLinearString* base,
    484                                           size_t off, size_t len) {
    485  MOZ_ASSERT(off + len <= base->length());
    486 
    487  JS::AutoCheckCannotGC nogc;
    488  if (isLatin1()) {
    489    if (base->hasLatin1Chars()) {
    490      return latin1Chars().append(base->latin1Chars(nogc) + off, len);
    491    }
    492    if (!inflateChars()) {
    493      return false;
    494    }
    495  }
    496  return base->hasLatin1Chars()
    497             ? twoByteChars().append(base->latin1Chars(nogc) + off, len)
    498             : twoByteChars().append(base->twoByteChars(nogc) + off, len);
    499 }
    500 
    501 inline bool StringBuilder::appendSubstring(JSString* base, size_t off,
    502                                           size_t len) {
    503  MOZ_ASSERT(maybeCx_);
    504 
    505  JSLinearString* linear = base->ensureLinear(maybeCx_);
    506  if (!linear) {
    507    return false;
    508  }
    509 
    510  return appendSubstring(linear, off, len);
    511 }
    512 
    513 inline bool StringBuilder::append(JSString* str) {
    514  MOZ_ASSERT(maybeCx_);
    515 
    516  JSLinearString* linear = str->ensureLinear(maybeCx_);
    517  if (!linear) {
    518    return false;
    519  }
    520 
    521  return append(linear);
    522 }
    523 
    524 /* ES5 9.8 ToString, appending the result to the string builder. */
    525 extern bool ValueToStringBuilderSlow(JSContext* cx, const Value& v,
    526                                     StringBuilder& sb);
    527 
    528 inline bool ValueToStringBuilder(JSContext* cx, const Value& v,
    529                                 StringBuilder& sb) {
    530  if (v.isString()) {
    531    return sb.append(v.toString());
    532  }
    533 
    534  return ValueToStringBuilderSlow(cx, v, sb);
    535 }
    536 
    537 /* ES5 9.8 ToString for booleans, appending the result to the string builder. */
    538 inline bool BooleanToStringBuilder(bool b, StringBuilder& sb) {
    539  return b ? sb.append("true") : sb.append("false");
    540 }
    541 
    542 } /* namespace js */
    543 
    544 #endif /* util_StringBuilder_h */