tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

StringBuilder.cpp (8780B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "util/StringBuilder.h"
      8 
      9 #include "mozilla/Latin1.h"
     10 #include "mozilla/Range.h"
     11 
     12 #include <algorithm>
     13 
     14 #include "frontend/ParserAtom.h"  // frontend::{ParserAtomsTable, TaggedParserAtomIndex
     15 #include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_*
     16 #include "vm/BigIntType.h"
     17 #include "vm/StaticStrings.h"
     18 
     19 #include "vm/JSObject-inl.h"
     20 #include "vm/StringType-inl.h"
     21 
     22 using namespace js;
     23 
     24 template <typename CharT, class Buffer>
     25 static CharT* ExtractWellSized(Buffer& cb) {
     26  size_t capacity = cb.capacity();
     27  size_t length = cb.length();
     28  StringBuilderAllocPolicy allocPolicy = cb.allocPolicy();
     29 
     30  CharT* buf = cb.extractOrCopyRawBuffer();
     31  if (!buf) {
     32    return nullptr;
     33  }
     34 
     35  // For medium/big buffers, avoid wasting more than 1/4 of the memory. Very
     36  // small strings will not reach here because they will have been stored in a
     37  // JSInlineString. Don't bother shrinking the allocation unless at least 80
     38  // bytes will be saved, which is a somewhat arbitrary number (though it does
     39  // correspond to a mozjemalloc size class.)
     40  MOZ_ASSERT(capacity >= length);
     41  constexpr size_t minCharsToReclaim = 80 / sizeof(CharT);
     42  if (capacity - length >= minCharsToReclaim &&
     43      capacity - length > capacity / 4) {
     44    CharT* tmp = allocPolicy.pod_realloc<CharT>(buf, capacity, length);
     45    if (!tmp) {
     46      allocPolicy.free_(buf);
     47      return nullptr;
     48    }
     49    buf = tmp;
     50  }
     51 
     52  return buf;
     53 }
     54 
     55 char16_t* StringBuilder::stealChars() {
     56  // stealChars shouldn't be used with JSStringBuilder because JSStringBuilder
     57  // reserves space for the header bytes in the vector.
     58  MOZ_RELEASE_ASSERT(numHeaderChars_ == 0);
     59 
     60  if (isLatin1() && !inflateChars()) {
     61    return nullptr;
     62  }
     63 
     64  return ExtractWellSized<char16_t>(twoByteChars());
     65 }
     66 
     67 bool StringBuilder::inflateChars() {
     68  MOZ_ASSERT(isLatin1());
     69 
     70  TwoByteCharBuffer twoByte(latin1Chars().allocPolicy());
     71 
     72  // Note: each char16_t is two bytes, so we need to change the number of header
     73  // characters.
     74  MOZ_ASSERT(numHeaderChars_ == 0 ||
     75             numHeaderChars_ == numHeaderChars<Latin1Char>());
     76  MOZ_ASSERT(latin1Chars().length() >= numHeaderChars_);
     77  size_t numHeaderCharsNew =
     78      numHeaderChars_ > 0 ? numHeaderChars<char16_t>() : 0;
     79 
     80  /*
     81   * Note: we don't use Vector::capacity() because it always returns a
     82   * value >= sInlineCapacity. Since Latin1CharBuffer::sInlineCapacity >
     83   * TwoByteCharBuffer::sInlineCapacitychars, we'd always malloc here.
     84   */
     85  size_t reserved = reservedExclHeader_ + numHeaderChars_;
     86  size_t capacity = std::max(reserved, latin1Chars().length());
     87  capacity = capacity - numHeaderChars_ + numHeaderCharsNew;
     88  if (!twoByte.reserve(capacity)) {
     89    return false;
     90  }
     91 
     92  twoByte.infallibleAppendN('\0', numHeaderCharsNew);
     93 
     94  auto charsSource = mozilla::AsChars(latin1Chars()).From(numHeaderChars_);
     95  twoByte.infallibleGrowByUninitialized(charsSource.Length());
     96 
     97  auto charsDest = mozilla::Span<char16_t>(twoByte).From(numHeaderCharsNew);
     98  mozilla::ConvertLatin1toUtf16(charsSource, charsDest);
     99 
    100  MOZ_ASSERT(twoByte.length() == numHeaderCharsNew + length());
    101 
    102  cb.destroy();
    103  cb.construct<TwoByteCharBuffer>(std::move(twoByte));
    104  numHeaderChars_ = numHeaderCharsNew;
    105  return true;
    106 }
    107 
    108 bool StringBuilder::append(const frontend::ParserAtomsTable& parserAtoms,
    109                           frontend::TaggedParserAtomIndex atom) {
    110  return parserAtoms.appendTo(*this, atom);
    111 }
    112 
    113 template <typename CharT>
    114 JSLinearString* StringBuilder::finishStringInternal(JSContext* cx,
    115                                                    gc::Heap heap) {
    116  // The Vector must include space for the mozilla::StringBuffer header.
    117  MOZ_ASSERT(numHeaderChars_ == numHeaderChars<CharT>());
    118  MOZ_ASSERT(std::all_of(chars<CharT>().begin(),
    119                         chars<CharT>().begin() + numHeaderChars_,
    120                         [](CharT c) { return c == '\0'; }));
    121 
    122  size_t len = length();
    123 
    124  if (JSAtom* staticStr = cx->staticStrings().lookup(begin<CharT>(), len)) {
    125    return staticStr;
    126  }
    127 
    128  if (JSInlineString::lengthFits<CharT>(len)) {
    129    mozilla::Range<const CharT> range(begin<CharT>(), len);
    130    return NewInlineString<CanGC>(cx, range);
    131  }
    132 
    133  // Use NewStringCopyNDontDeflate if the string is too short for a buffer,
    134  // because:
    135  //
    136  //  (1) If the string is very short and fits in the Vector's inline storage,
    137  //      we can potentially nursery-allocate the characters and avoid a malloc
    138  //      call.
    139  //  (2) ExtractWellSized often performs a realloc because we over-allocate in
    140  //      StringBufferAllocPolicy. After this we'd still have to move/copy the
    141  //      characters in memory to discard the space we reserved for the
    142  //      mozilla::StringBuffer header. Because we have to copy the characters
    143  //      anyway, use NewStringCopyNDontDeflate instead where we can allocate in
    144  //      the nursery.
    145  if (len < JSString::MIN_BYTES_FOR_BUFFER / sizeof(CharT)) {
    146    return NewStringCopyNDontDeflate<CanGC>(cx, begin<CharT>(), len, heap);
    147  }
    148 
    149  if (MOZ_UNLIKELY(!mozilla::StringBuffer::IsValidLength<CharT>(len))) {
    150    ReportAllocationOverflow(cx);
    151    return nullptr;
    152  }
    153 
    154  // mozilla::StringBuffer requires a null terminator.
    155  auto& charsWithHeader = chars<CharT>();
    156  if (!charsWithHeader.append('\0')) {
    157    return nullptr;
    158  }
    159 
    160  CharT* mem = ExtractWellSized<CharT>(charsWithHeader);
    161  if (!mem) {
    162    return nullptr;
    163  }
    164  // The Vector is now empty and may be used again, so re-reserve space for
    165  // the header.
    166  MOZ_ASSERT(charsWithHeader.empty());
    167  MOZ_ALWAYS_TRUE(charsWithHeader.appendN('\0', numHeaderChars_));
    168 
    169  // Initialize the StringBuffer header.
    170  RefPtr<mozilla::StringBuffer> buffer =
    171      mozilla::StringBuffer::ConstructInPlace(mem, (len + 1) * sizeof(CharT));
    172  MOZ_ASSERT(buffer->Data() == mem + numHeaderChars_,
    173             "chars are where mozilla::StringBuffer expects them");
    174  MOZ_ASSERT(static_cast<CharT*>(buffer->Data())[len] == '\0',
    175             "StringBuffer must be null-terminated");
    176 
    177  Rooted<JSString::OwnedChars<CharT>> owned(cx, std::move(buffer), len);
    178  return JSLinearString::new_<CanGC, CharT>(cx, &owned, heap);
    179 }
    180 
    181 JSLinearString* JSStringBuilder::finishString(gc::Heap heap) {
    182  MOZ_ASSERT(maybeCx_);
    183 
    184  size_t len = length();
    185  if (len == 0) {
    186    return maybeCx_->names().empty_;
    187  }
    188 
    189  if (MOZ_UNLIKELY(!JSString::validateLength(maybeCx_, len))) {
    190    return nullptr;
    191  }
    192 
    193  static_assert(JSFatInlineString::MAX_LENGTH_TWO_BYTE <
    194                TwoByteCharBuffer::InlineLength);
    195  static_assert(JSFatInlineString::MAX_LENGTH_LATIN1 <
    196                Latin1CharBuffer::InlineLength);
    197 
    198  return isLatin1() ? finishStringInternal<Latin1Char>(maybeCx_, heap)
    199                    : finishStringInternal<char16_t>(maybeCx_, heap);
    200 }
    201 
    202 JSAtom* StringBuilder::finishAtom() {
    203  MOZ_ASSERT(maybeCx_);
    204 
    205  size_t len = length();
    206  if (len == 0) {
    207    return maybeCx_->names().empty_;
    208  }
    209 
    210  JSAtom* atom = isLatin1() ? AtomizeChars(maybeCx_, rawLatin1Begin(), len)
    211                            : AtomizeChars(maybeCx_, rawTwoByteBegin(), len);
    212  clear();
    213  return atom;
    214 }
    215 
    216 frontend::TaggedParserAtomIndex StringBuilder::finishParserAtom(
    217    frontend::ParserAtomsTable& parserAtoms, FrontendContext* fc) {
    218  size_t len = length();
    219  if (len == 0) {
    220    return frontend::TaggedParserAtomIndex::WellKnown::empty();
    221  }
    222 
    223  auto result = isLatin1()
    224                    ? parserAtoms.internLatin1(fc, rawLatin1Begin(), len)
    225                    : parserAtoms.internChar16(fc, rawTwoByteBegin(), len);
    226  clear();
    227  return result;
    228 }
    229 
    230 bool js::ValueToStringBuilderSlow(JSContext* cx, const Value& arg,
    231                                  StringBuilder& sb) {
    232  RootedValue v(cx, arg);
    233  if (!ToPrimitive(cx, JSTYPE_STRING, &v)) {
    234    return false;
    235  }
    236 
    237  if (v.isString()) {
    238    return sb.append(v.toString());
    239  }
    240  if (v.isNumber()) {
    241    return NumberValueToStringBuilder(v, sb);
    242  }
    243  if (v.isBoolean()) {
    244    return BooleanToStringBuilder(v.toBoolean(), sb);
    245  }
    246  if (v.isNull()) {
    247    return sb.append(cx->names().null);
    248  }
    249  if (v.isSymbol()) {
    250    JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
    251                              JSMSG_SYMBOL_TO_STRING);
    252    return false;
    253  }
    254  if (v.isBigInt()) {
    255    RootedBigInt i(cx, v.toBigInt());
    256    JSLinearString* str = BigInt::toString<CanGC>(cx, i, 10);
    257    if (!str) {
    258      return false;
    259    }
    260    return sb.append(str);
    261  }
    262  MOZ_ASSERT(v.isUndefined());
    263  return sb.append(cx->names().undefined);
    264 }