StringBuilder.cpp (8780B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "util/StringBuilder.h" 8 9 #include "mozilla/Latin1.h" 10 #include "mozilla/Range.h" 11 12 #include <algorithm> 13 14 #include "frontend/ParserAtom.h" // frontend::{ParserAtomsTable, TaggedParserAtomIndex 15 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* 16 #include "vm/BigIntType.h" 17 #include "vm/StaticStrings.h" 18 19 #include "vm/JSObject-inl.h" 20 #include "vm/StringType-inl.h" 21 22 using namespace js; 23 24 template <typename CharT, class Buffer> 25 static CharT* ExtractWellSized(Buffer& cb) { 26 size_t capacity = cb.capacity(); 27 size_t length = cb.length(); 28 StringBuilderAllocPolicy allocPolicy = cb.allocPolicy(); 29 30 CharT* buf = cb.extractOrCopyRawBuffer(); 31 if (!buf) { 32 return nullptr; 33 } 34 35 // For medium/big buffers, avoid wasting more than 1/4 of the memory. Very 36 // small strings will not reach here because they will have been stored in a 37 // JSInlineString. Don't bother shrinking the allocation unless at least 80 38 // bytes will be saved, which is a somewhat arbitrary number (though it does 39 // correspond to a mozjemalloc size class.) 40 MOZ_ASSERT(capacity >= length); 41 constexpr size_t minCharsToReclaim = 80 / sizeof(CharT); 42 if (capacity - length >= minCharsToReclaim && 43 capacity - length > capacity / 4) { 44 CharT* tmp = allocPolicy.pod_realloc<CharT>(buf, capacity, length); 45 if (!tmp) { 46 allocPolicy.free_(buf); 47 return nullptr; 48 } 49 buf = tmp; 50 } 51 52 return buf; 53 } 54 55 char16_t* StringBuilder::stealChars() { 56 // stealChars shouldn't be used with JSStringBuilder because JSStringBuilder 57 // reserves space for the header bytes in the vector. 58 MOZ_RELEASE_ASSERT(numHeaderChars_ == 0); 59 60 if (isLatin1() && !inflateChars()) { 61 return nullptr; 62 } 63 64 return ExtractWellSized<char16_t>(twoByteChars()); 65 } 66 67 bool StringBuilder::inflateChars() { 68 MOZ_ASSERT(isLatin1()); 69 70 TwoByteCharBuffer twoByte(latin1Chars().allocPolicy()); 71 72 // Note: each char16_t is two bytes, so we need to change the number of header 73 // characters. 74 MOZ_ASSERT(numHeaderChars_ == 0 || 75 numHeaderChars_ == numHeaderChars<Latin1Char>()); 76 MOZ_ASSERT(latin1Chars().length() >= numHeaderChars_); 77 size_t numHeaderCharsNew = 78 numHeaderChars_ > 0 ? numHeaderChars<char16_t>() : 0; 79 80 /* 81 * Note: we don't use Vector::capacity() because it always returns a 82 * value >= sInlineCapacity. Since Latin1CharBuffer::sInlineCapacity > 83 * TwoByteCharBuffer::sInlineCapacitychars, we'd always malloc here. 84 */ 85 size_t reserved = reservedExclHeader_ + numHeaderChars_; 86 size_t capacity = std::max(reserved, latin1Chars().length()); 87 capacity = capacity - numHeaderChars_ + numHeaderCharsNew; 88 if (!twoByte.reserve(capacity)) { 89 return false; 90 } 91 92 twoByte.infallibleAppendN('\0', numHeaderCharsNew); 93 94 auto charsSource = mozilla::AsChars(latin1Chars()).From(numHeaderChars_); 95 twoByte.infallibleGrowByUninitialized(charsSource.Length()); 96 97 auto charsDest = mozilla::Span<char16_t>(twoByte).From(numHeaderCharsNew); 98 mozilla::ConvertLatin1toUtf16(charsSource, charsDest); 99 100 MOZ_ASSERT(twoByte.length() == numHeaderCharsNew + length()); 101 102 cb.destroy(); 103 cb.construct<TwoByteCharBuffer>(std::move(twoByte)); 104 numHeaderChars_ = numHeaderCharsNew; 105 return true; 106 } 107 108 bool StringBuilder::append(const frontend::ParserAtomsTable& parserAtoms, 109 frontend::TaggedParserAtomIndex atom) { 110 return parserAtoms.appendTo(*this, atom); 111 } 112 113 template <typename CharT> 114 JSLinearString* StringBuilder::finishStringInternal(JSContext* cx, 115 gc::Heap heap) { 116 // The Vector must include space for the mozilla::StringBuffer header. 117 MOZ_ASSERT(numHeaderChars_ == numHeaderChars<CharT>()); 118 MOZ_ASSERT(std::all_of(chars<CharT>().begin(), 119 chars<CharT>().begin() + numHeaderChars_, 120 [](CharT c) { return c == '\0'; })); 121 122 size_t len = length(); 123 124 if (JSAtom* staticStr = cx->staticStrings().lookup(begin<CharT>(), len)) { 125 return staticStr; 126 } 127 128 if (JSInlineString::lengthFits<CharT>(len)) { 129 mozilla::Range<const CharT> range(begin<CharT>(), len); 130 return NewInlineString<CanGC>(cx, range); 131 } 132 133 // Use NewStringCopyNDontDeflate if the string is too short for a buffer, 134 // because: 135 // 136 // (1) If the string is very short and fits in the Vector's inline storage, 137 // we can potentially nursery-allocate the characters and avoid a malloc 138 // call. 139 // (2) ExtractWellSized often performs a realloc because we over-allocate in 140 // StringBufferAllocPolicy. After this we'd still have to move/copy the 141 // characters in memory to discard the space we reserved for the 142 // mozilla::StringBuffer header. Because we have to copy the characters 143 // anyway, use NewStringCopyNDontDeflate instead where we can allocate in 144 // the nursery. 145 if (len < JSString::MIN_BYTES_FOR_BUFFER / sizeof(CharT)) { 146 return NewStringCopyNDontDeflate<CanGC>(cx, begin<CharT>(), len, heap); 147 } 148 149 if (MOZ_UNLIKELY(!mozilla::StringBuffer::IsValidLength<CharT>(len))) { 150 ReportAllocationOverflow(cx); 151 return nullptr; 152 } 153 154 // mozilla::StringBuffer requires a null terminator. 155 auto& charsWithHeader = chars<CharT>(); 156 if (!charsWithHeader.append('\0')) { 157 return nullptr; 158 } 159 160 CharT* mem = ExtractWellSized<CharT>(charsWithHeader); 161 if (!mem) { 162 return nullptr; 163 } 164 // The Vector is now empty and may be used again, so re-reserve space for 165 // the header. 166 MOZ_ASSERT(charsWithHeader.empty()); 167 MOZ_ALWAYS_TRUE(charsWithHeader.appendN('\0', numHeaderChars_)); 168 169 // Initialize the StringBuffer header. 170 RefPtr<mozilla::StringBuffer> buffer = 171 mozilla::StringBuffer::ConstructInPlace(mem, (len + 1) * sizeof(CharT)); 172 MOZ_ASSERT(buffer->Data() == mem + numHeaderChars_, 173 "chars are where mozilla::StringBuffer expects them"); 174 MOZ_ASSERT(static_cast<CharT*>(buffer->Data())[len] == '\0', 175 "StringBuffer must be null-terminated"); 176 177 Rooted<JSString::OwnedChars<CharT>> owned(cx, std::move(buffer), len); 178 return JSLinearString::new_<CanGC, CharT>(cx, &owned, heap); 179 } 180 181 JSLinearString* JSStringBuilder::finishString(gc::Heap heap) { 182 MOZ_ASSERT(maybeCx_); 183 184 size_t len = length(); 185 if (len == 0) { 186 return maybeCx_->names().empty_; 187 } 188 189 if (MOZ_UNLIKELY(!JSString::validateLength(maybeCx_, len))) { 190 return nullptr; 191 } 192 193 static_assert(JSFatInlineString::MAX_LENGTH_TWO_BYTE < 194 TwoByteCharBuffer::InlineLength); 195 static_assert(JSFatInlineString::MAX_LENGTH_LATIN1 < 196 Latin1CharBuffer::InlineLength); 197 198 return isLatin1() ? finishStringInternal<Latin1Char>(maybeCx_, heap) 199 : finishStringInternal<char16_t>(maybeCx_, heap); 200 } 201 202 JSAtom* StringBuilder::finishAtom() { 203 MOZ_ASSERT(maybeCx_); 204 205 size_t len = length(); 206 if (len == 0) { 207 return maybeCx_->names().empty_; 208 } 209 210 JSAtom* atom = isLatin1() ? AtomizeChars(maybeCx_, rawLatin1Begin(), len) 211 : AtomizeChars(maybeCx_, rawTwoByteBegin(), len); 212 clear(); 213 return atom; 214 } 215 216 frontend::TaggedParserAtomIndex StringBuilder::finishParserAtom( 217 frontend::ParserAtomsTable& parserAtoms, FrontendContext* fc) { 218 size_t len = length(); 219 if (len == 0) { 220 return frontend::TaggedParserAtomIndex::WellKnown::empty(); 221 } 222 223 auto result = isLatin1() 224 ? parserAtoms.internLatin1(fc, rawLatin1Begin(), len) 225 : parserAtoms.internChar16(fc, rawTwoByteBegin(), len); 226 clear(); 227 return result; 228 } 229 230 bool js::ValueToStringBuilderSlow(JSContext* cx, const Value& arg, 231 StringBuilder& sb) { 232 RootedValue v(cx, arg); 233 if (!ToPrimitive(cx, JSTYPE_STRING, &v)) { 234 return false; 235 } 236 237 if (v.isString()) { 238 return sb.append(v.toString()); 239 } 240 if (v.isNumber()) { 241 return NumberValueToStringBuilder(v, sb); 242 } 243 if (v.isBoolean()) { 244 return BooleanToStringBuilder(v.toBoolean(), sb); 245 } 246 if (v.isNull()) { 247 return sb.append(cx->names().null); 248 } 249 if (v.isSymbol()) { 250 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 251 JSMSG_SYMBOL_TO_STRING); 252 return false; 253 } 254 if (v.isBigInt()) { 255 RootedBigInt i(cx, v.toBigInt()); 256 JSLinearString* str = BigInt::toString<CanGC>(cx, i, 10); 257 if (!str) { 258 return false; 259 } 260 return sb.append(str); 261 } 262 MOZ_ASSERT(v.isUndefined()); 263 return sb.append(cx->names().undefined); 264 }