StringBuilder.h (17856B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef util_StringBuilder_h 8 #define util_StringBuilder_h 9 10 #include "mozilla/CheckedInt.h" 11 #include "mozilla/MaybeOneOf.h" 12 #include "mozilla/Utf8.h" 13 14 #include "frontend/FrontendContext.h" 15 #include "js/Vector.h" 16 #include "vm/Runtime.h" 17 #include "vm/StringType.h" 18 19 namespace js { 20 21 class FrontendContext; 22 23 namespace frontend { 24 class ParserAtomsTable; 25 class TaggedParserAtomIndex; 26 } // namespace frontend 27 28 namespace detail { 29 30 // GrowEltsAggressively will multiply the space by a factor of 8 on overflow, to 31 // avoid very expensive memcpys for large strings (eg giant toJSON output for 32 // sessionstore.js). Drop back to the normal expansion policy once the buffer 33 // hits 128MB. 34 static constexpr size_t AggressiveLimit = 128 << 20; 35 36 template <size_t EltSize> 37 inline size_t GrowEltsAggressively(size_t aOldElts, size_t aIncr) { 38 mozilla::CheckedInt<size_t> required = 39 mozilla::CheckedInt<size_t>(aOldElts) + aIncr; 40 if (!(required * 2).isValid()) { 41 return 0; 42 } 43 required = mozilla::RoundUpPow2(required.value()); 44 required *= 8; 45 if (!(required * EltSize).isValid() || required.value() > AggressiveLimit) { 46 // Fall back to doubling behavior if the aggressive growth fails or gets too 47 // big. 48 return mozilla::detail::GrowEltsByDoubling<EltSize>(aOldElts, aIncr); 49 } 50 return required.value(); 51 }; 52 53 } // namespace detail 54 55 class StringBuilderAllocPolicy { 56 TempAllocPolicy impl_; 57 const arena_id_t& arenaId_; 58 59 public: 60 StringBuilderAllocPolicy(FrontendContext* fc, const arena_id_t& arenaId) 61 : impl_(fc), arenaId_(arenaId) {} 62 63 StringBuilderAllocPolicy(JSContext* cx, const arena_id_t& arenaId) 64 : impl_(cx), arenaId_(arenaId) {} 65 66 template <typename T> 67 T* maybe_pod_malloc(size_t numElems) { 68 return impl_.maybe_pod_arena_malloc<T>(arenaId_, numElems); 69 } 70 template <typename T> 71 T* maybe_pod_calloc(size_t numElems) { 72 return impl_.maybe_pod_arena_calloc<T>(arenaId_, numElems); 73 } 74 template <typename T> 75 T* maybe_pod_realloc(T* p, size_t oldSize, size_t newSize) { 76 return impl_.maybe_pod_arena_realloc<T>(arenaId_, p, oldSize, newSize); 77 } 78 template <typename T> 79 T* pod_malloc(size_t numElems) { 80 return impl_.pod_arena_malloc<T>(arenaId_, numElems); 81 } 82 template <typename T> 83 T* pod_calloc(size_t numElems) { 84 return impl_.pod_arena_calloc<T>(arenaId_, numElems); 85 } 86 template <typename T> 87 T* pod_realloc(T* p, size_t oldSize, size_t newSize) { 88 return impl_.pod_arena_realloc<T>(arenaId_, p, oldSize, newSize); 89 } 90 template <typename T> 91 void free_(T* p, size_t numElems = 0) { 92 impl_.free_(p, numElems); 93 } 94 void reportAllocOverflow() const { impl_.reportAllocOverflow(); } 95 bool checkSimulatedOOM() const { return impl_.checkSimulatedOOM(); } 96 97 // See ComputeGrowth in mfbt/Vector.h. 98 template <size_t EltSize> 99 static size_t computeGrowth(size_t aOldElts, size_t aIncr) { 100 return detail::GrowEltsAggressively<EltSize>(aOldElts, aIncr); 101 } 102 }; 103 104 /* 105 * String builder that eagerly checks for over-allocation past the maximum 106 * string length. 107 * 108 * Any operation which would exceed the maximum string length causes an 109 * exception report on the context and results in a failed return value. 110 * 111 * Well-sized extractions (which waste no more than 1/4 of their char 112 * buffer space) are guaranteed for strings built by this interface. 113 * See |extractWellSized|. 114 */ 115 class StringBuilder { 116 protected: 117 template <typename CharT> 118 using BufferType = 119 Vector<CharT, 80 / sizeof(CharT), StringBuilderAllocPolicy>; 120 121 /* 122 * The Vector's buffer may be either stolen or copied, so we need to use 123 * TempAllocPolicy and account for the memory manually when stealing. 124 */ 125 using Latin1CharBuffer = BufferType<Latin1Char>; 126 using TwoByteCharBuffer = BufferType<char16_t>; 127 128 JSContext* maybeCx_ = nullptr; 129 130 // cb starts out as a Latin1CharBuffer. When a TwoByte char is appended, 131 // inflateChars() constructs a TwoByteCharBuffer and copies the Latin1 chars. 132 // 133 // Note that this buffer can include extra zero bytes for the string buffer 134 // header. See numHeaderChars_ below. 135 mozilla::MaybeOneOf<Latin1CharBuffer, TwoByteCharBuffer> cb; 136 137 // Number of reserve()'d chars, see inflateChars. Does not include 138 // numHeaderChars_. 139 size_t reservedExclHeader_ = 0; 140 141 // The number of '\0' characters prepended by JSStringBuilder to reserve space 142 // for the mozilla::StringBuffer header. This is always non-zero for 143 // JSStringBuilder and zero otherwise. 144 // 145 // Note that this is an implementation detail: public methods such as |begin| 146 // and |length| return the actual string contents/length without these extra 147 // characters. 148 uint8_t numHeaderChars_ = 0; 149 150 StringBuilder(const StringBuilder& other) = delete; 151 void operator=(const StringBuilder& other) = delete; 152 153 // Returns the number of characters to prepend to reserve enough space for the 154 // mozilla::StringBuffer header. 155 template <typename CharT> 156 static constexpr size_t numHeaderChars() { 157 static_assert(sizeof(mozilla::StringBuffer) % sizeof(CharT) == 0); 158 return sizeof(mozilla::StringBuffer) / sizeof(CharT); 159 } 160 161 template <typename CharT> 162 MOZ_ALWAYS_INLINE bool isCharType() const { 163 return cb.constructed<BufferType<CharT>>(); 164 } 165 166 MOZ_ALWAYS_INLINE bool isLatin1() const { return isCharType<Latin1Char>(); } 167 168 MOZ_ALWAYS_INLINE bool isTwoByte() const { return isCharType<char16_t>(); } 169 170 template <typename CharT> 171 MOZ_ALWAYS_INLINE BufferType<CharT>& chars() { 172 MOZ_ASSERT(isCharType<CharT>()); 173 return cb.ref<BufferType<CharT>>(); 174 } 175 176 template <typename CharT> 177 MOZ_ALWAYS_INLINE const BufferType<CharT>& chars() const { 178 MOZ_ASSERT(isCharType<CharT>()); 179 return cb.ref<BufferType<CharT>>(); 180 } 181 182 MOZ_ALWAYS_INLINE TwoByteCharBuffer& twoByteChars() { 183 return chars<char16_t>(); 184 } 185 186 MOZ_ALWAYS_INLINE const TwoByteCharBuffer& twoByteChars() const { 187 return chars<char16_t>(); 188 } 189 190 MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() { 191 return chars<Latin1Char>(); 192 } 193 194 MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const { 195 return chars<Latin1Char>(); 196 } 197 198 [[nodiscard]] bool inflateChars(); 199 200 template <typename CharT> 201 JSLinearString* finishStringInternal(JSContext* cx, gc::Heap heap); 202 203 public: 204 explicit StringBuilder(JSContext* cx, 205 const arena_id_t& arenaId = js::MallocArena) 206 : maybeCx_(cx) { 207 MOZ_ASSERT(cx); 208 cb.construct<Latin1CharBuffer>(StringBuilderAllocPolicy{cx, arenaId}); 209 } 210 211 // This constructor should only be used if the methods related to the 212 // following are not used, because they require a JSContext: 213 // * JSString 214 // * JSAtom 215 // * mozilla::Utf8Unit 216 explicit StringBuilder(FrontendContext* fc, 217 const arena_id_t& arenaId = js::MallocArena) { 218 MOZ_ASSERT(fc); 219 cb.construct<Latin1CharBuffer>(StringBuilderAllocPolicy{fc, arenaId}); 220 } 221 222 void clear() { shrinkTo(0); } 223 224 [[nodiscard]] bool reserve(size_t len) { 225 auto lenWithHeader = mozilla::CheckedInt<size_t>(len) + numHeaderChars_; 226 if (MOZ_UNLIKELY(!lenWithHeader.isValid())) { 227 ReportAllocationOverflow(maybeCx_); 228 return false; 229 } 230 if (len > reservedExclHeader_) { 231 reservedExclHeader_ = len; 232 } 233 return isLatin1() ? latin1Chars().reserve(lenWithHeader.value()) 234 : twoByteChars().reserve(lenWithHeader.value()); 235 } 236 [[nodiscard]] bool growByUninitialized(size_t incr) { 237 auto newLenWithHeader = 238 mozilla::CheckedInt<size_t>(length()) + numHeaderChars_ + incr; 239 if (MOZ_UNLIKELY(!newLenWithHeader.isValid())) { 240 ReportAllocationOverflow(maybeCx_); 241 return false; 242 } 243 return isLatin1() ? latin1Chars().growByUninitialized(incr) 244 : twoByteChars().growByUninitialized(incr); 245 } 246 void shrinkTo(size_t newLength) { 247 // Note: this can't overflow because the new length must be <= the current 248 // length. 249 newLength += numHeaderChars_; 250 if (isLatin1()) { 251 latin1Chars().shrinkTo(newLength); 252 } else { 253 twoByteChars().shrinkTo(newLength); 254 } 255 } 256 bool empty() const { return length() == 0; } 257 size_t length() const { 258 size_t len = isLatin1() ? latin1Chars().length() : twoByteChars().length(); 259 MOZ_ASSERT(len >= numHeaderChars_); 260 return len - numHeaderChars_; 261 } 262 char16_t getChar(size_t idx) const { 263 idx += numHeaderChars_; 264 return isLatin1() ? latin1Chars()[idx] : twoByteChars()[idx]; 265 } 266 267 [[nodiscard]] bool ensureTwoByteChars() { 268 return isTwoByte() || inflateChars(); 269 } 270 271 [[nodiscard]] bool append(const char16_t c) { 272 if (isLatin1()) { 273 if (c <= JSString::MAX_LATIN1_CHAR) { 274 return latin1Chars().append(Latin1Char(c)); 275 } 276 if (!inflateChars()) { 277 return false; 278 } 279 } 280 return twoByteChars().append(c); 281 } 282 [[nodiscard]] bool append(Latin1Char c) { 283 return isLatin1() ? latin1Chars().append(c) : twoByteChars().append(c); 284 } 285 [[nodiscard]] bool append(char c) { return append(Latin1Char(c)); } 286 287 [[nodiscard]] inline bool append(const char16_t* begin, const char16_t* end); 288 289 [[nodiscard]] bool append(const char16_t* chars, size_t len) { 290 return append(chars, chars + len); 291 } 292 293 [[nodiscard]] bool append(const Latin1Char* begin, const Latin1Char* end) { 294 return isLatin1() ? latin1Chars().append(begin, end) 295 : twoByteChars().append(begin, end); 296 } 297 [[nodiscard]] bool append(const Latin1Char* chars, size_t len) { 298 return append(chars, chars + len); 299 } 300 301 /** 302 * Interpret the provided count of UTF-8 code units as UTF-8, and append 303 * the represented code points to this. If the code units contain invalid 304 * UTF-8, leave the internal buffer in a consistent but unspecified state, 305 * report an error, and return false. 306 */ 307 [[nodiscard]] bool append(const mozilla::Utf8Unit* units, size_t len); 308 309 [[nodiscard]] bool append(const JS::ConstCharPtr chars, size_t len) { 310 return append(chars.get(), chars.get() + len); 311 } 312 [[nodiscard]] bool appendN(Latin1Char c, size_t n) { 313 return isLatin1() ? latin1Chars().appendN(c, n) 314 : twoByteChars().appendN(c, n); 315 } 316 317 [[nodiscard]] inline bool append(JSString* str); 318 [[nodiscard]] inline bool append(const JSLinearString* str); 319 [[nodiscard]] inline bool appendSubstring(JSString* base, size_t off, 320 size_t len); 321 [[nodiscard]] inline bool appendSubstring(const JSLinearString* base, 322 size_t off, size_t len); 323 [[nodiscard]] bool append(const frontend::ParserAtomsTable& parserAtoms, 324 frontend::TaggedParserAtomIndex atom); 325 326 [[nodiscard]] bool append(const char* chars, size_t len) { 327 return append(reinterpret_cast<const Latin1Char*>(chars), len); 328 } 329 330 template <size_t ArrayLength> 331 [[nodiscard]] bool append(const char (&array)[ArrayLength]) { 332 return append(array, ArrayLength - 1); /* No trailing '\0'. */ 333 } 334 335 /* Infallible variants usable when the corresponding space is reserved. */ 336 void infallibleAppend(Latin1Char c) { 337 if (isLatin1()) { 338 latin1Chars().infallibleAppend(c); 339 } else { 340 twoByteChars().infallibleAppend(c); 341 } 342 } 343 void infallibleAppend(char c) { infallibleAppend(Latin1Char(c)); } 344 void infallibleAppend(const Latin1Char* chars, size_t len) { 345 if (isLatin1()) { 346 latin1Chars().infallibleAppend(chars, len); 347 } else { 348 twoByteChars().infallibleAppend(chars, len); 349 } 350 } 351 void infallibleAppend(const char* chars, size_t len) { 352 infallibleAppend(reinterpret_cast<const Latin1Char*>(chars), len); 353 } 354 355 void infallibleAppendSubstring(const JSLinearString* base, size_t off, 356 size_t len); 357 358 /* 359 * Because inflation is fallible, these methods should only be used after 360 * calling ensureTwoByteChars(). 361 */ 362 void infallibleAppend(const char16_t* chars, size_t len) { 363 twoByteChars().infallibleAppend(chars, len); 364 } 365 void infallibleAppend(char16_t c) { twoByteChars().infallibleAppend(c); } 366 367 bool isUnderlyingBufferLatin1() const { return isLatin1(); } 368 369 template <typename CharT> 370 CharT* begin() { 371 MOZ_ASSERT(chars<CharT>().length() >= numHeaderChars_); 372 return chars<CharT>().begin() + numHeaderChars_; 373 } 374 375 template <typename CharT> 376 CharT* end() { 377 return chars<CharT>().end(); 378 } 379 380 template <typename CharT> 381 const CharT* begin() const { 382 MOZ_ASSERT(chars<CharT>().length() >= numHeaderChars_); 383 return chars<CharT>().begin() + numHeaderChars_; 384 } 385 386 template <typename CharT> 387 const CharT* end() const { 388 return chars<CharT>().end(); 389 } 390 391 char16_t* rawTwoByteBegin() { return begin<char16_t>(); } 392 char16_t* rawTwoByteEnd() { return end<char16_t>(); } 393 const char16_t* rawTwoByteBegin() const { return begin<char16_t>(); } 394 const char16_t* rawTwoByteEnd() const { return end<char16_t>(); } 395 396 Latin1Char* rawLatin1Begin() { return begin<Latin1Char>(); } 397 Latin1Char* rawLatin1End() { return end<Latin1Char>(); } 398 const Latin1Char* rawLatin1Begin() const { return begin<Latin1Char>(); } 399 const Latin1Char* rawLatin1End() const { return end<Latin1Char>(); } 400 401 /* Identical to finishString() except that an atom is created. */ 402 JSAtom* finishAtom(); 403 frontend::TaggedParserAtomIndex finishParserAtom( 404 frontend::ParserAtomsTable& parserAtoms, FrontendContext* fc); 405 406 /* 407 * Creates a raw string from the characters in this buffer. The string is 408 * exactly the characters in this buffer (inflated to TwoByte), it is *not* 409 * null-terminated unless the last appended character was '\0'. 410 */ 411 char16_t* stealChars(); 412 }; 413 414 // Like StringBuilder, but uses StringBufferArena for the characters. 415 class JSStringBuilder : public StringBuilder { 416 public: 417 explicit JSStringBuilder(JSContext* cx) 418 : StringBuilder(cx, js::StringBufferArena) { 419 // Reserve space for the mozilla::StringBuffer header. 420 numHeaderChars_ = numHeaderChars<Latin1Char>(); 421 MOZ_ALWAYS_TRUE(latin1Chars().appendN('\0', numHeaderChars_)); 422 } 423 424 /* 425 * Creates a string from the characters in this buffer, then (regardless 426 * whether string creation succeeded or failed) empties the buffer. 427 * 428 * Returns nullptr if string creation failed. 429 */ 430 JSLinearString* finishString(gc::Heap heap = gc::Heap::Default); 431 }; 432 433 inline bool StringBuilder::append(const char16_t* begin, const char16_t* end) { 434 MOZ_ASSERT(begin <= end); 435 if (isLatin1()) { 436 while (true) { 437 if (begin >= end) { 438 return true; 439 } 440 if (*begin > JSString::MAX_LATIN1_CHAR) { 441 break; 442 } 443 if (!latin1Chars().append(*begin)) { 444 return false; 445 } 446 ++begin; 447 } 448 if (!inflateChars()) { 449 return false; 450 } 451 } 452 return twoByteChars().append(begin, end); 453 } 454 455 inline bool StringBuilder::append(const JSLinearString* str) { 456 JS::AutoCheckCannotGC nogc; 457 if (isLatin1()) { 458 if (str->hasLatin1Chars()) { 459 return latin1Chars().append(str->latin1Chars(nogc), str->length()); 460 } 461 if (!inflateChars()) { 462 return false; 463 } 464 } 465 return str->hasLatin1Chars() 466 ? twoByteChars().append(str->latin1Chars(nogc), str->length()) 467 : twoByteChars().append(str->twoByteChars(nogc), str->length()); 468 } 469 470 inline void StringBuilder::infallibleAppendSubstring(const JSLinearString* base, 471 size_t off, size_t len) { 472 MOZ_ASSERT(off + len <= base->length()); 473 MOZ_ASSERT_IF(base->hasTwoByteChars(), isTwoByte()); 474 475 JS::AutoCheckCannotGC nogc; 476 if (base->hasLatin1Chars()) { 477 infallibleAppend(base->latin1Chars(nogc) + off, len); 478 } else { 479 infallibleAppend(base->twoByteChars(nogc) + off, len); 480 } 481 } 482 483 inline bool StringBuilder::appendSubstring(const JSLinearString* base, 484 size_t off, size_t len) { 485 MOZ_ASSERT(off + len <= base->length()); 486 487 JS::AutoCheckCannotGC nogc; 488 if (isLatin1()) { 489 if (base->hasLatin1Chars()) { 490 return latin1Chars().append(base->latin1Chars(nogc) + off, len); 491 } 492 if (!inflateChars()) { 493 return false; 494 } 495 } 496 return base->hasLatin1Chars() 497 ? twoByteChars().append(base->latin1Chars(nogc) + off, len) 498 : twoByteChars().append(base->twoByteChars(nogc) + off, len); 499 } 500 501 inline bool StringBuilder::appendSubstring(JSString* base, size_t off, 502 size_t len) { 503 MOZ_ASSERT(maybeCx_); 504 505 JSLinearString* linear = base->ensureLinear(maybeCx_); 506 if (!linear) { 507 return false; 508 } 509 510 return appendSubstring(linear, off, len); 511 } 512 513 inline bool StringBuilder::append(JSString* str) { 514 MOZ_ASSERT(maybeCx_); 515 516 JSLinearString* linear = str->ensureLinear(maybeCx_); 517 if (!linear) { 518 return false; 519 } 520 521 return append(linear); 522 } 523 524 /* ES5 9.8 ToString, appending the result to the string builder. */ 525 extern bool ValueToStringBuilderSlow(JSContext* cx, const Value& v, 526 StringBuilder& sb); 527 528 inline bool ValueToStringBuilder(JSContext* cx, const Value& v, 529 StringBuilder& sb) { 530 if (v.isString()) { 531 return sb.append(v.toString()); 532 } 533 534 return ValueToStringBuilderSlow(cx, v, sb); 535 } 536 537 /* ES5 9.8 ToString for booleans, appending the result to the string builder. */ 538 inline bool BooleanToStringBuilder(bool b, StringBuilder& sb) { 539 return b ? sb.append("true") : sb.append("false"); 540 } 541 542 } /* namespace js */ 543 544 #endif /* util_StringBuilder_h */