RegExpShim.h (45307B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 // Copyright 2019 the V8 project authors. All rights reserved. 8 // Use of this source code is governed by a BSD-style license that can be 9 // found in the LICENSE file. 10 11 #ifndef RegexpShim_h 12 #define RegexpShim_h 13 14 #include "mozilla/Assertions.h" 15 #include "mozilla/Attributes.h" 16 #include "mozilla/MathAlgorithms.h" 17 #include "mozilla/Maybe.h" 18 #include "mozilla/SegmentedVector.h" 19 #include "mozilla/Sprintf.h" 20 21 #include <algorithm> 22 #include <cctype> 23 #include <optional> 24 25 #include "irregexp/RegExpTypes.h" 26 #include "irregexp/util/FlagsShim.h" 27 #include "irregexp/util/VectorShim.h" 28 #include "irregexp/util/ZoneShim.h" 29 #include "jit/JitCode.h" 30 #include "jit/Label.h" 31 #include "jit/shared/Assembler-shared.h" 32 #include "js/friend/StackLimits.h" // js::AutoCheckRecursionLimit 33 #include "js/RegExpFlags.h" 34 #include "js/Value.h" 35 #include "threading/ExclusiveData.h" 36 #include "util/DifferentialTesting.h" 37 #include "vm/JSContext.h" 38 #include "vm/MutexIDs.h" 39 #include "vm/NativeObject.h" 40 #include "vm/RegExpShared.h" 41 42 // Forward declaration of classes 43 namespace v8 { 44 namespace internal { 45 46 class Heap; 47 class Isolate; 48 class RegExpMatchInfo; 49 class RegExpStack; 50 51 template <typename T> 52 class Handle; 53 54 } // namespace internal 55 } // namespace v8 56 57 #define V8_WARN_UNUSED_RESULT [[nodiscard]] 58 #define V8_EXPORT_PRIVATE 59 #define V8_FALLTHROUGH [[fallthrough]] 60 #define V8_NODISCARD [[nodiscard]] 61 #define V8_NOEXCEPT noexcept 62 63 #define FATAL(x) MOZ_CRASH(x) 64 #define UNREACHABLE() MOZ_CRASH("unreachable code") 65 #define UNIMPLEMENTED() MOZ_CRASH("unimplemented code") 66 #define STATIC_ASSERT(exp) static_assert(exp, #exp) 67 68 #define DCHECK MOZ_ASSERT 69 #define DCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs)) 70 #define DCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs)) 71 #define DCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs)) 72 #define DCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs)) 73 #define DCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs)) 74 #define DCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs)) 75 #define DCHECK_NULL(val) MOZ_ASSERT((val) == nullptr) 76 #define DCHECK_NOT_NULL(val) MOZ_ASSERT((val) != nullptr) 77 #define DCHECK_IMPLIES(lhs, rhs) MOZ_ASSERT_IF(lhs, rhs) 78 #define CHECK MOZ_RELEASE_ASSERT 79 #define CHECK_EQ(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) == (rhs)) 80 #define CHECK_LE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) <= (rhs)) 81 #define CHECK_GE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) >= (rhs)) 82 #define CHECK_IMPLIES(lhs, rhs) MOZ_RELEASE_ASSERT(!(lhs) || (rhs)) 83 #define CONSTEXPR_DCHECK MOZ_ASSERT 84 85 // These assertions are necessary to preserve the soundness of the V8 86 // sandbox. In V8, they are debug-only if the sandbox is off, but release 87 // asserts if the sandbox is turned on. We don't have an equivalent sandbox, 88 // so they can be debug checks for now. 89 #define SBXCHECK MOZ_ASSERT 90 #define SBXCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs)) 91 #define SBXCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs)) 92 #define SBXCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs)) 93 #define SBXCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs)) 94 #define SBXCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs)) 95 #define SBXCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs)) 96 97 #define MemCopy memcpy 98 99 // Origin: 100 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L310-L319 101 // ptrdiff_t is 't' according to the standard, but MSVC uses 'I'. 102 #ifdef _MSC_VER 103 # define V8PRIxPTRDIFF "Ix" 104 # define V8PRIdPTRDIFF "Id" 105 # define V8PRIuPTRDIFF "Iu" 106 #else 107 # define V8PRIxPTRDIFF "tx" 108 # define V8PRIdPTRDIFF "td" 109 # define V8PRIuPTRDIFF "tu" 110 #endif 111 112 #define arraysize std::size 113 114 // Explicitly declare the assignment operator as deleted. 115 #define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete 116 117 // Explicitly declare the copy constructor and assignment operator as deleted. 118 // This also deletes the implicit move constructor and implicit move assignment 119 // operator, but still allows to manually define them. 120 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ 121 TypeName(const TypeName&) = delete; \ 122 DISALLOW_ASSIGN(TypeName) 123 124 // Explicitly declare all implicit constructors as deleted, namely the 125 // default constructor, copy constructor and operator= functions. 126 // This is especially useful for classes containing only static methods. 127 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ 128 TypeName() = delete; \ 129 DISALLOW_COPY_AND_ASSIGN(TypeName) 130 131 namespace v8 { 132 133 // Origin: 134 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L364-L367 135 template <typename T, typename U> 136 constexpr inline bool IsAligned(T value, U alignment) { 137 return (value & (alignment - 1)) == 0; 138 } 139 140 using Address = uintptr_t; 141 static const Address kNullAddress = 0; 142 143 inline uintptr_t GetCurrentStackPosition() { 144 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0)); 145 } 146 147 namespace base { 148 149 // Latin1/UTF-16 constants 150 // Code-point values in Unicode 4.0 are 21 bits wide. 151 // Code units in UTF-16 are 16 bits wide. 152 using uc16 = char16_t; 153 using uc32 = uint32_t; 154 155 constexpr int kUC16Size = sizeof(base::uc16); 156 157 // Origin: 158 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L247-L258 159 // The USE(x, ...) template is used to silence C++ compiler warnings 160 // issued for (yet) unused variables (typically parameters). 161 // The arguments are guaranteed to be evaluated from left to right. 162 struct Use { 163 template <typename T> 164 Use(T&&) {} // NOLINT(runtime/explicit) 165 }; 166 #define USE(...) \ 167 do { \ 168 ::v8::base::Use unused_tmp_array_for_use_macro[]{__VA_ARGS__}; \ 169 (void)unused_tmp_array_for_use_macro; \ 170 } while (false) 171 172 // Origin: 173 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/safe_conversions.h#L35-L39 174 // saturated_cast<> is analogous to static_cast<> for numeric types, except 175 // that the specified numeric conversion will saturate rather than overflow or 176 // underflow. 177 template <typename Dst, typename Src> 178 inline Dst saturated_cast(Src value); 179 180 // These are the only specializations that are needed for regexp code. 181 // Instead of pulling in dozens of lines of template goo 182 // to derive it, I used the implementation from uint8_clamped in 183 // ArrayBufferObject.h. 184 template <> 185 inline uint8_t saturated_cast<uint8_t, int>(int x) { 186 return (x >= 0) ? ((x < 255) ? uint8_t(x) : 255) : 0; 187 } 188 template <> 189 inline uint8_t saturated_cast<uint8_t, uint32_t>(uint32_t x) { 190 return (x < 255) ? uint8_t(x) : 255; 191 } 192 193 // Origin: 194 // https://github.com/v8/v8/blob/fc088cdaccadede84886eee881e67af9db53669a/src/base/bounds.h#L14-L28 195 // Checks if value is in range [lower_limit, higher_limit] using a single 196 // branch. 197 template <typename T, typename U> 198 inline constexpr bool IsInRange(T value, U lower_limit, U higher_limit) { 199 using unsigned_T = typename std::make_unsigned<T>::type; 200 // Use static_cast to support enum classes. 201 return static_cast<unsigned_T>(static_cast<unsigned_T>(value) - 202 static_cast<unsigned_T>(lower_limit)) <= 203 static_cast<unsigned_T>(static_cast<unsigned_T>(higher_limit) - 204 static_cast<unsigned_T>(lower_limit)); 205 } 206 207 #define LAZY_INSTANCE_INITIALIZER \ 208 { \ 209 } 210 211 template <typename T> 212 class LazyInstanceImpl { 213 public: 214 LazyInstanceImpl() : value_(js::mutexid::IrregexpLazyStatic) {} 215 216 const T* Pointer() { 217 auto val = value_.lock(); 218 if (val->isNothing()) { 219 val->emplace(); 220 } 221 return val->ptr(); 222 } 223 224 private: 225 js::ExclusiveData<mozilla::Maybe<T>> value_; 226 }; 227 228 template <typename T> 229 class LazyInstance { 230 public: 231 using type = LazyInstanceImpl<T>; 232 }; 233 234 // Origin: 235 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L40-L48 236 // Returns the value (0 .. 15) of a hexadecimal character c. 237 // If c is not a legal hexadecimal character, returns a value < 0. 238 // Used in regexp-parser.cc 239 inline int HexValue(base::uc32 c) { 240 c -= '0'; 241 if (static_cast<unsigned>(c) <= 9) return c; 242 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. 243 if (static_cast<unsigned>(c) <= 5) return c + 10; 244 return -1; 245 } 246 247 template <typename... Args> 248 [[nodiscard]] uint32_t hash_combine(uint32_t aHash, Args... aArgs) { 249 return mozilla::AddToHash(aHash, aArgs...); 250 } 251 252 namespace bits { 253 254 inline uint64_t CountTrailingZeros(uint64_t value) { 255 return mozilla::CountTrailingZeroes64(value); 256 } 257 258 inline size_t RoundUpToPowerOfTwo32(size_t value) { 259 return mozilla::RoundUpPow2(value); 260 } 261 262 template <typename T> 263 constexpr bool IsPowerOfTwo(T value) { 264 return value > 0 && (value & (value - 1)) == 0; 265 } 266 267 } // namespace bits 268 } // namespace base 269 270 namespace unibrow { 271 272 using uchar = unsigned int; 273 274 // Origin: 275 // https://github.com/v8/v8/blob/1f1e4cdb04c75eab77adbecd5f5514ddc3eb56cf/src/strings/unicode.h#L133-L150 276 class Latin1 { 277 public: 278 static const base::uc16 kMaxChar = 0xff; 279 280 // Convert the character to Latin-1 case equivalent if possible. 281 static inline base::uc16 TryConvertToLatin1(base::uc16 c) { 282 // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN". 283 // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN". 284 if (c == 0x039C || c == 0x03BC) { 285 return 0xB5; 286 } 287 // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER 288 // Y WITH DIAERESIS". 289 if (c == 0x0178) { 290 return 0xFF; 291 } 292 return c; 293 } 294 }; 295 296 // Origin: 297 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L99-L131 298 class Utf16 { 299 public: 300 static inline bool IsLeadSurrogate(int code) { 301 return js::unicode::IsLeadSurrogate(code); 302 } 303 static inline bool IsTrailSurrogate(int code) { 304 return js::unicode::IsTrailSurrogate(code); 305 } 306 static inline base::uc16 LeadSurrogate(uint32_t char_code) { 307 return js::unicode::LeadSurrogate(char_code); 308 } 309 static inline base::uc16 TrailSurrogate(uint32_t char_code) { 310 return js::unicode::TrailSurrogate(char_code); 311 } 312 static inline uint32_t CombineSurrogatePair(char16_t lead, char16_t trail) { 313 return js::unicode::UTF16Decode(lead, trail); 314 } 315 static const uchar kMaxNonSurrogateCharCode = 0xffff; 316 }; 317 318 #ifndef V8_INTL_SUPPORT 319 320 // A cache used in case conversion. It caches the value for characters 321 // that either have no mapping or map to a single character independent 322 // of context. Characters that map to more than one character or that 323 // map differently depending on context are always looked up. 324 // Origin: 325 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L64-L88 326 template <class T, int size = 256> 327 class Mapping { 328 public: 329 inline Mapping() = default; 330 inline int get(uchar c, uchar n, uchar* result) { 331 CacheEntry entry = entries_[c & kMask]; 332 if (entry.code_point_ == c) { 333 if (entry.offset_ == 0) { 334 return 0; 335 } else { 336 result[0] = c + entry.offset_; 337 return 1; 338 } 339 } else { 340 return CalculateValue(c, n, result); 341 } 342 } 343 344 private: 345 int CalculateValue(uchar c, uchar n, uchar* result) { 346 bool allow_caching = true; 347 int length = T::Convert(c, n, result, &allow_caching); 348 if (allow_caching) { 349 if (length == 1) { 350 entries_[c & kMask] = CacheEntry(c, result[0] - c); 351 return 1; 352 } else { 353 entries_[c & kMask] = CacheEntry(c, 0); 354 return 0; 355 } 356 } else { 357 return length; 358 } 359 } 360 361 struct CacheEntry { 362 inline CacheEntry() : code_point_(kNoChar), offset_(0) {} 363 inline CacheEntry(uchar code_point, signed offset) 364 : code_point_(code_point), offset_(offset) {} 365 uchar code_point_; 366 signed offset_; 367 static const int kNoChar = (1 << 21) - 1; 368 }; 369 static const int kSize = size; 370 static const int kMask = kSize - 1; 371 CacheEntry entries_[kSize]; 372 }; 373 374 // Origin: 375 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L241-L252 376 struct Ecma262Canonicalize { 377 static const int kMaxWidth = 1; 378 static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); 379 }; 380 struct Ecma262UnCanonicalize { 381 static const int kMaxWidth = 4; 382 static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); 383 }; 384 struct CanonicalizationRange { 385 static const int kMaxWidth = 1; 386 static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); 387 }; 388 389 #endif // !V8_INTL_SUPPORT 390 391 struct Letter { 392 static bool Is(uchar c); 393 }; 394 395 } // namespace unibrow 396 397 namespace internal { 398 399 #define PRINTF_FORMAT(x, y) MOZ_FORMAT_PRINTF(x, y) 400 void PRINTF_FORMAT(1, 2) PrintF(const char* format, ...); 401 void PRINTF_FORMAT(2, 3) PrintF(FILE* out, const char* format, ...); 402 403 // Superclass for classes only using static method functions. 404 // The subclass of AllStatic cannot be instantiated at all. 405 class AllStatic { 406 #ifdef DEBUG 407 public: 408 AllStatic() = delete; 409 #endif 410 }; 411 412 // Superclass for classes managed with new and delete. 413 // In irregexp, this is only AlternativeGeneration (in regexp-compiler.cc) 414 // Compare: 415 // https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/utils/allocation.cc#L88-L96 416 class Malloced { 417 public: 418 static void* operator new(size_t size) { 419 js::AutoEnterOOMUnsafeRegion oomUnsafe; 420 void* result = js_malloc(size); 421 if (!result) { 422 oomUnsafe.crash("Irregexp Malloced shim"); 423 } 424 return result; 425 } 426 static void operator delete(void* p) { js_free(p); } 427 }; 428 429 constexpr int32_t KB = 1024; 430 constexpr int32_t MB = 1024 * 1024; 431 432 #define kMaxInt JSVAL_INT_MAX 433 #define kMinInt JSVAL_INT_MIN 434 constexpr int kSystemPointerSize = sizeof(void*); 435 436 // The largest integer n such that n and n + 1 are both exactly 437 // representable as a Number value. ES6 section 20.1.2.6 438 constexpr uint64_t kMaxSafeIntegerUint64 = (uint64_t{1} << 53) - 1; 439 constexpr double kMaxSafeInteger = static_cast<double>(kMaxSafeIntegerUint64); 440 441 constexpr int kBitsPerByte = 8; 442 constexpr int kBitsPerByteLog2 = 3; 443 constexpr int kUInt16Size = sizeof(uint16_t); 444 constexpr int kInt32Size = sizeof(int32_t); 445 constexpr int kUInt32Size = sizeof(uint32_t); 446 constexpr int kInt64Size = sizeof(int64_t); 447 448 constexpr int kMaxUInt16 = (1 << 16) - 1; 449 450 inline constexpr bool IsDecimalDigit(base::uc32 c) { 451 return c >= '0' && c <= '9'; 452 } 453 454 inline constexpr int AsciiAlphaToLower(base::uc32 c) { return c | 0x20; } 455 456 inline bool is_uint24(int64_t val) { return (val >> 24) == 0; } 457 inline bool is_int24(int64_t val) { 458 int64_t limit = int64_t(1) << 23; 459 return (-limit <= val) && (val < limit); 460 } 461 462 inline bool IsIdentifierStart(base::uc32 c) { 463 return js::unicode::IsIdentifierStart(char32_t(c)); 464 } 465 inline bool IsIdentifierPart(base::uc32 c) { 466 return js::unicode::IsIdentifierPart(char32_t(c)); 467 } 468 469 // Wrappers to disambiguate char16_t and uc16. 470 struct AsUC16 { 471 explicit AsUC16(char16_t v) : value(v) {} 472 char16_t value; 473 }; 474 475 struct AsUC32 { 476 explicit AsUC32(int32_t v) : value(v) {} 477 int32_t value; 478 }; 479 480 std::ostream& operator<<(std::ostream& os, const AsUC16& c); 481 std::ostream& operator<<(std::ostream& os, const AsUC32& c); 482 483 // This class is used for the output of trace-regexp-parser. V8 has 484 // an elaborate implementation to ensure that the output gets to the 485 // right place, even on Android. We just need something that will 486 // print output (ideally to stderr, to match the rest of our tracing 487 // code). This is an empty wrapper that will convert itself to 488 // std::cerr when used. 489 class StdoutStream { 490 public: 491 operator std::ostream&() const; 492 template <typename T> 493 std::ostream& operator<<(T t); 494 }; 495 496 // Reuse existing Maybe implementation 497 using mozilla::Maybe; 498 499 template <typename T> 500 Maybe<T> Just(const T& value) { 501 return mozilla::Some(value); 502 } 503 504 template <typename T> 505 mozilla::Nothing Nothing() { 506 return mozilla::Nothing(); 507 } 508 509 template <typename T> 510 using PseudoHandle = mozilla::UniquePtr<T, JS::FreePolicy>; 511 512 // Compare 8bit/16bit chars to 8bit/16bit chars. 513 // Used indirectly by regexp-interpreter.cc 514 // Taken from: https://github.com/v8/v8/blob/master/src/utils/utils.h 515 template <typename lchar, typename rchar> 516 inline int CompareCharsUnsigned(const lchar* lhs, const rchar* rhs, 517 size_t chars) { 518 const lchar* limit = lhs + chars; 519 if (sizeof(*lhs) == sizeof(char) && sizeof(*rhs) == sizeof(char)) { 520 // memcmp compares byte-by-byte, yielding wrong results for two-byte 521 // strings on little-endian systems. 522 return memcmp(lhs, rhs, chars); 523 } 524 while (lhs < limit) { 525 int r = static_cast<int>(*lhs) - static_cast<int>(*rhs); 526 if (r != 0) return r; 527 ++lhs; 528 ++rhs; 529 } 530 return 0; 531 } 532 template <typename lchar, typename rchar> 533 inline int CompareChars(const lchar* lhs, const rchar* rhs, size_t chars) { 534 DCHECK_LE(sizeof(lchar), 2); 535 DCHECK_LE(sizeof(rchar), 2); 536 if (sizeof(lchar) == 1) { 537 if (sizeof(rchar) == 1) { 538 return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs), 539 reinterpret_cast<const uint8_t*>(rhs), chars); 540 } else { 541 return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs), 542 reinterpret_cast<const char16_t*>(rhs), 543 chars); 544 } 545 } else { 546 if (sizeof(rchar) == 1) { 547 return CompareCharsUnsigned(reinterpret_cast<const char16_t*>(lhs), 548 reinterpret_cast<const uint8_t*>(rhs), chars); 549 } else { 550 return CompareCharsUnsigned(reinterpret_cast<const char16_t*>(lhs), 551 reinterpret_cast<const char16_t*>(rhs), 552 chars); 553 } 554 } 555 } 556 557 // Compare 8bit/16bit chars to 8bit/16bit chars. 558 template <typename lchar, typename rchar> 559 inline bool CompareCharsEqualUnsigned(const lchar* lhs, const rchar* rhs, 560 size_t chars) { 561 STATIC_ASSERT(std::is_unsigned<lchar>::value); 562 STATIC_ASSERT(std::is_unsigned<rchar>::value); 563 if (sizeof(*lhs) == sizeof(*rhs)) { 564 // memcmp compares byte-by-byte, but for equality it doesn't matter whether 565 // two-byte char comparison is little- or big-endian. 566 return memcmp(lhs, rhs, chars * sizeof(*lhs)) == 0; 567 } 568 for (const lchar* limit = lhs + chars; lhs < limit; ++lhs, ++rhs) { 569 if (*lhs != *rhs) return false; 570 } 571 return true; 572 } 573 574 template <typename lchar, typename rchar> 575 inline bool CompareCharsEqual(const lchar* lhs, const rchar* rhs, 576 size_t chars) { 577 using ulchar = typename std::make_unsigned<lchar>::type; 578 using urchar = typename std::make_unsigned<rchar>::type; 579 return CompareCharsEqualUnsigned(reinterpret_cast<const ulchar*>(lhs), 580 reinterpret_cast<const urchar*>(rhs), chars); 581 } 582 583 // V8::Object ~= JS::Value 584 class Object { 585 public: 586 // The default object constructor in V8 stores a nullptr, 587 // which has its low bit clear and is interpreted as Smi(0). 588 constexpr Object() : asBits_(JS::Int32Value(0).asRawBits()) {} 589 590 Object(const JS::Value& value) : asBits_(value.asRawBits()) {} 591 592 // This constructor is only used in an unused implementation of 593 // IsCharacterInRangeArray in regexp-macro-assembler.cc. 594 Object(uintptr_t raw) : asBits_(raw) { MOZ_CRASH("unused"); } 595 596 JS::Value value() const { return JS::Value::fromRawBits(asBits_); } 597 598 inline static Object cast(Object object) { return object; } 599 600 protected: 601 void setValue(const JS::Value& val) { asBits_ = val.asRawBits(); } 602 uint64_t asBits_; 603 } JS_HAZ_GC_POINTER; 604 605 // Used in regexp-interpreter.cc to check the return value of 606 // isolate->stack_guard()->HandleInterrupts(). We want to handle 607 // interrupts in the caller, so we return a magic value from 608 // HandleInterrupts and check for it here. 609 inline bool IsExceptionHole(Object obj, Isolate*) { 610 return obj.value().isMagic(JS_INTERRUPT_REGEXP); 611 } 612 613 class Smi : public Object { 614 public: 615 static Smi FromInt(int32_t value) { 616 Smi smi; 617 smi.setValue(JS::Int32Value(value)); 618 return smi; 619 } 620 static inline int32_t ToInt(const Object object) { 621 return object.value().toInt32(); 622 } 623 }; 624 625 // V8::HeapObject ~= GC thing 626 class HeapObject : public Object { 627 public: 628 inline static HeapObject cast(Object object) { 629 HeapObject h; 630 h.setValue(object.value()); 631 return h; 632 } 633 }; 634 635 // V8's values use low-bit tagging. If the LSB is 0, it's a small 636 // integer. If the LSB is 1, it's a pointer to some GC thing. In V8, 637 // this wrapper class is used to represent a pointer that has the low 638 // bit set, or a small integer that has been shifted left by one 639 // bit. We don't use the same tagging system, so all we need is a 640 // transparent wrapper that automatically converts to/from the wrapped 641 // type. 642 template <typename T> 643 class Tagged { 644 public: 645 Tagged() {} 646 MOZ_IMPLICIT Tagged(const T& value) : value_(value) {} 647 MOZ_IMPLICIT Tagged(T&& value) : value_(std::move(value)) {} 648 649 T* operator->() { return &value_; } 650 constexpr operator T() const { return value_; } 651 652 private: 653 T value_; 654 }; 655 656 // Adapted from v8/src/objects/casting.h 657 658 template <typename To, typename From> 659 inline Tagged<To> UncheckedCast(Tagged<From> value) { 660 return Tagged<To>(To::cast(value)); 661 } 662 663 template <typename To, typename From> 664 inline Tagged<To> Cast(const From& value) { 665 return UncheckedCast<To>(Tagged(value)); 666 } 667 668 // A fixed-size array with Objects (aka Values) as element types. 669 // Implemented using the dense elements of an ArrayObject. 670 // Used for named captures. 671 class FixedArray : public HeapObject { 672 public: 673 inline void set(uint32_t index, Object value) { 674 inner()->setDenseElement(index, value.value()); 675 } 676 inline static FixedArray cast(Object object) { 677 FixedArray f; 678 f.setValue(object.value()); 679 return f; 680 } 681 js::NativeObject* inner() { 682 return &value().toObject().as<js::NativeObject>(); 683 } 684 }; 685 686 /* 687 * Conceptually, ByteArrayData is a variable-size structure. To 688 * implement this in a C++-approved way, we allocate a struct 689 * containing the 32-bit length field, followed by additional memory 690 * for the data. To access the data, we get a pointer to the next byte 691 * after the length field and cast it to the correct type. 692 */ 693 inline uint8_t* ByteArrayData::data() { 694 static_assert(alignof(uint8_t) <= alignof(ByteArrayData), 695 "The trailing data must be aligned to start immediately " 696 "after the header with no padding."); 697 ByteArrayData* immediatelyAfter = this + 1; 698 return reinterpret_cast<uint8_t*>(immediatelyAfter); 699 } 700 701 template <typename T> 702 T* ByteArrayData::typedData() { 703 static_assert(alignof(T) <= alignof(ByteArrayData)); 704 MOZ_ASSERT(uintptr_t(data()) % alignof(T) == 0); 705 return reinterpret_cast<T*>(data()); 706 } 707 708 template <typename T> 709 T ByteArrayData::getTyped(uint32_t index) { 710 MOZ_ASSERT(index < length() / sizeof(T)); 711 return typedData<T>()[index]; 712 } 713 714 template <typename T> 715 void ByteArrayData::setTyped(uint32_t index, T value) { 716 MOZ_ASSERT(index < length() / sizeof(T)); 717 typedData<T>()[index] = value; 718 } 719 720 // A fixed-size array of bytes. 721 class ByteArray : public HeapObject { 722 protected: 723 ByteArrayData* inner() const { 724 return static_cast<ByteArrayData*>(value().toPrivate()); 725 } 726 friend bool IsByteArray(Object obj); 727 728 public: 729 PseudoHandle<ByteArrayData> takeOwnership(Isolate* isolate); 730 PseudoHandle<ByteArrayData> maybeTakeOwnership(Isolate* isolate); 731 732 uint8_t get(uint32_t index) { return inner()->get(index); } 733 void set(uint32_t index, uint8_t val) { inner()->set(index, val); } 734 735 uint32_t length() const { return inner()->length(); } 736 uint8_t* begin() { return inner()->data(); } 737 738 static ByteArray cast(Object object) { 739 ByteArray b; 740 b.setValue(object.value()); 741 return b; 742 } 743 744 friend class SMRegExpMacroAssembler; 745 }; 746 747 // A byte array that can be trusted to not contain malicious data. 748 // See https://issues.chromium.org/issues/40069826. 749 class TrustedByteArray : public ByteArray { 750 public: 751 static TrustedByteArray cast(Object object) { 752 TrustedByteArray b; 753 b.setValue(object.value()); 754 return b; 755 } 756 }; 757 758 // This is only used in assertions. In debug builds, we put a magic value 759 // in the header of each ByteArrayData, and assert here that it matches. 760 inline bool IsByteArray(Object obj) { 761 MOZ_ASSERT(ByteArray::cast(obj).inner()->magic() == 762 ByteArrayData::ExpectedMagic); 763 return true; 764 } 765 766 // This is a convenience class used in V8 for treating a ByteArray as an array 767 // of fixed-size integers. This version supports integral types up to 32 bits. 768 template <typename T> 769 class FixedIntegerArray : public ByteArray { 770 static_assert(alignof(T) <= alignof(ByteArrayData)); 771 static_assert(std::is_integral<T>::value); 772 773 public: 774 static Handle<FixedIntegerArray<T>> New(Isolate* isolate, uint32_t length); 775 776 T get(uint32_t index) { return inner()->template getTyped<T>(index); }; 777 void set(uint32_t index, T value) { 778 inner()->template setTyped<T>(index, value); 779 } 780 781 static FixedIntegerArray<T> cast(Object object) { 782 FixedIntegerArray<T> f; 783 f.setValue(object.value()); 784 return f; 785 } 786 }; 787 788 using FixedUInt16Array = FixedIntegerArray<uint16_t>; 789 790 // Like Handles in SM, V8 handles are references to marked pointers. 791 // Unlike SM, where Rooted pointers are created individually on the 792 // stack, the target of a V8 handle lives in an arena on the isolate 793 // (~= JSContext). Whenever a Handle is created, a new "root" is 794 // created at the end of the arena. 795 // 796 // HandleScopes are used to manage the lifetimes of these handles. A 797 // HandleScope lives on the stack and stores the size of the arena at 798 // the time of its creation. When the function returns and the 799 // HandleScope is destroyed, the arena is truncated to its previous 800 // size, clearing all roots that were created since the creation of 801 // the HandleScope. 802 // 803 // In some cases, objects that are GC-allocated in V8 are not in SM. 804 // In particular, irregexp allocates ByteArrays during code generation 805 // to store lookup tables. This does not play nicely with the SM 806 // macroassembler's requirement that no GC allocations take place 807 // while it is on the stack. To work around this, this shim layer also 808 // provides the ability to create pseudo-handles, which are not 809 // managed by the GC but provide the same API to irregexp. The "root" 810 // of a pseudohandle is a unique pointer living in a second arena. If 811 // the allocated object should outlive the HandleScope, it must be 812 // manually moved out of the arena using maybeTakeOwnership. 813 // (If maybeTakeOwnership is called multiple times, it will return 814 // a null pointer on subsequent calls.) 815 816 class MOZ_STACK_CLASS HandleScope { 817 public: 818 HandleScope(Isolate* isolate); 819 ~HandleScope(); 820 821 private: 822 size_t level_ = 0; 823 size_t non_gc_level_ = 0; 824 Isolate* isolate_; 825 826 friend class Isolate; 827 }; 828 829 // Origin: 830 // https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/handles.h#L88-L171 831 template <typename T> 832 class MOZ_NONHEAP_CLASS Handle { 833 public: 834 Handle() : location_(nullptr) {} 835 Handle(T object, Isolate* isolate); 836 Handle(const JS::Value& value, Isolate* isolate); 837 838 // Constructor for handling automatic up casting. 839 template <typename S, 840 typename = std::enable_if_t<std::is_convertible_v<S*, T*>>> 841 inline Handle(Handle<S> handle) : location_(handle.location_) {} 842 843 inline bool is_null() const { return location_ == nullptr; } 844 845 inline T operator*() const { return T::cast(Object(*location_)); }; 846 847 // {ObjectRef} is returned by {Handle::operator->}. It should never be stored 848 // anywhere or used in any other code; no one should ever have to spell out 849 // {ObjectRef} in code. Its only purpose is to be dereferenced immediately by 850 // "operator-> chaining". Returning the address of the field is valid because 851 // this object's lifetime only ends at the end of the full statement. 852 // Origin: 853 // https://github.com/v8/v8/blob/03aaa4b3bf4cb01eee1f223b252e6869b04ab08c/src/handles/handles.h#L91-L105 854 class MOZ_TEMPORARY_CLASS ObjectRef { 855 public: 856 T* operator->() { return &object_; } 857 858 private: 859 friend class Handle; 860 explicit ObjectRef(T object) : object_(object) {} 861 862 T object_; 863 }; 864 inline ObjectRef operator->() const { return ObjectRef{**this}; } 865 866 static Handle<T> fromHandleValue(JS::HandleValue handle) { 867 return Handle(handle.address()); 868 } 869 870 private: 871 Handle(const JS::Value* location) : location_(location) {} 872 873 template <typename> 874 friend class Handle; 875 template <typename> 876 friend class MaybeHandle; 877 878 const JS::Value* location_; 879 }; 880 881 // A Handle can be converted into a MaybeHandle. Converting a MaybeHandle 882 // into a Handle requires checking that it does not point to nullptr. This 883 // ensures nullptr checks before use. 884 // 885 // Also note that Handles do not provide default equality comparison or hashing 886 // operators on purpose. Such operators would be misleading, because intended 887 // semantics is ambiguous between Handle location and object identity. 888 // Origin: 889 // https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/maybe-handles.h#L15-L78 890 template <typename T> 891 class MOZ_NONHEAP_CLASS MaybeHandle final { 892 public: 893 MaybeHandle() : location_(nullptr) {} 894 895 // Constructor for handling automatic up casting from Handle. 896 // Ex. Handle<JSArray> can be passed when MaybeHandle<Object> is expected. 897 template <typename S, 898 typename = std::enable_if_t<std::is_convertible_v<S*, T*>>> 899 MaybeHandle(Handle<S> handle) : location_(handle.location_) {} 900 901 inline Handle<T> ToHandleChecked() const { 902 MOZ_RELEASE_ASSERT(location_); 903 return Handle<T>(location_); 904 } 905 906 // Convert to a Handle with a type that can be upcasted to. 907 template <typename S> 908 inline bool ToHandle(Handle<S>* out) const { 909 if (location_) { 910 *out = Handle<T>(location_); 911 return true; 912 } else { 913 *out = Handle<T>(); 914 return false; 915 } 916 } 917 918 private: 919 JS::Value* location_; 920 }; 921 922 // From v8/src/handles/handles-inl.h 923 924 template <typename T> 925 inline Handle<T> handle(T object, Isolate* isolate) { 926 return Handle<T>(object, isolate); 927 } 928 929 // V8 is migrating to a conservative stack scanning approach. When that 930 // is enabled, a DirectHandle points directly at the V8 heap, and an 931 // IndirectHandle is an unmigrated old-style Handle with a layer of 932 // indirection. When disabled (which matches our implementation) the two 933 // types are the same. See: 934 // https://github.com/v8/v8/blob/887ec63c43e23c4fefba1c52d4525654bdc71e5b/src/common/globals.h#L1000-L1012 935 template <typename T> 936 using DirectHandle = Handle<T>; 937 938 template <typename T> 939 using IndirectHandle = Handle<T>; 940 941 template <typename T> 942 using MaybeDirectHandle = MaybeHandle<T>; 943 944 // RAII Guard classes 945 946 using DisallowGarbageCollection = JS::AutoAssertNoGC; 947 948 // V8 uses this inside DisallowGarbageCollection regions to turn 949 // allocation back on before throwing a stack overflow exception or 950 // handling interrupts. AutoSuppressGC is sufficient for the former 951 // case, but not for the latter: handling interrupts can execute 952 // arbitrary script code, and V8 jumps through some scary hoops to 953 // "manually relocate unhandlified references" afterwards. To keep 954 // things sane, we don't try to handle interrupts while regex code is 955 // still on the stack. Instead, we return EXCEPTION and handle 956 // interrupts in the caller. (See RegExpShared::execute.) 957 958 class AllowGarbageCollection { 959 public: 960 AllowGarbageCollection() {} 961 }; 962 963 // Origin: 964 // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L83-L474 965 class String : public HeapObject { 966 private: 967 JSString* str() const { return value().toString(); } 968 969 public: 970 String() = default; 971 String(JSString* str) { setValue(JS::StringValue(str)); } 972 973 operator JSString*() const { return str(); } 974 975 // Max char codes. 976 static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar; 977 static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar; 978 static const int kMaxUtf16CodeUnit = 0xffff; 979 static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit; 980 static const base::uc32 kMaxCodePoint = 0x10ffff; 981 982 MOZ_ALWAYS_INLINE int length() const { return str()->length(); } 983 bool IsFlat() { return str()->isLinear(); }; 984 985 // Origin: 986 // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L95-L152 987 class FlatContent { 988 public: 989 FlatContent(JSLinearString* string, const DisallowGarbageCollection& no_gc) 990 : string_(string), no_gc_(no_gc) {} 991 inline bool IsOneByte() const { return string_->hasLatin1Chars(); } 992 inline bool IsTwoByte() const { return !string_->hasLatin1Chars(); } 993 994 base::Vector<const uint8_t> ToOneByteVector() const { 995 MOZ_ASSERT(IsOneByte()); 996 return base::Vector<const uint8_t>(string_->latin1Chars(no_gc_), 997 string_->length()); 998 } 999 base::Vector<const base::uc16> ToUC16Vector() const { 1000 MOZ_ASSERT(IsTwoByte()); 1001 return base::Vector<const base::uc16>(string_->twoByteChars(no_gc_), 1002 string_->length()); 1003 } 1004 void UnsafeDisableChecksumVerification() { 1005 // Intentional no-op. See the comment for AllowGarbageCollection above. 1006 } 1007 1008 private: 1009 const JSLinearString* string_; 1010 const JS::AutoAssertNoGC& no_gc_; 1011 }; 1012 FlatContent GetFlatContent(const DisallowGarbageCollection& no_gc) { 1013 MOZ_ASSERT(IsFlat()); 1014 return FlatContent(&str()->asLinear(), no_gc); 1015 } 1016 1017 static Handle<String> Flatten(Isolate* isolate, Handle<String> string); 1018 1019 inline static String cast(Object object) { 1020 String s; 1021 MOZ_ASSERT(object.value().isString()); 1022 s.setValue(object.value()); 1023 return s; 1024 } 1025 1026 inline static bool IsOneByteRepresentationUnderneath(String string) { 1027 return string.str()->hasLatin1Chars(); 1028 } 1029 inline bool IsOneByteRepresentation() const { 1030 return str()->hasLatin1Chars(); 1031 } 1032 1033 std::unique_ptr<char[]> ToCString(); 1034 1035 template <typename Char> 1036 base::Vector<const Char> GetCharVector( 1037 const DisallowGarbageCollection& no_gc); 1038 1039 friend class RegExpUtils; 1040 }; 1041 1042 template <> 1043 inline base::Vector<const uint8_t> String::GetCharVector( 1044 const DisallowGarbageCollection& no_gc) { 1045 String::FlatContent flat = GetFlatContent(no_gc); 1046 MOZ_ASSERT(flat.IsOneByte()); 1047 return flat.ToOneByteVector(); 1048 } 1049 1050 template <> 1051 inline base::Vector<const base::uc16> String::GetCharVector( 1052 const DisallowGarbageCollection& no_gc) { 1053 String::FlatContent flat = GetFlatContent(no_gc); 1054 MOZ_ASSERT(flat.IsTwoByte()); 1055 return flat.ToUC16Vector(); 1056 } 1057 1058 using RegExpFlags = JS::RegExpFlags; 1059 using RegExpFlag = JS::RegExpFlags::Flag; 1060 1061 class JSRegExp { 1062 public: 1063 // Each capture (including the match itself) needs two registers. 1064 static constexpr int RegistersForCaptureCount(int count) { 1065 return (count + 1) * 2; 1066 } 1067 1068 static RegExpFlags AsRegExpFlags(RegExpFlags flags) { return flags; } 1069 static RegExpFlags AsJSRegExpFlags(RegExpFlags flags) { return flags; } 1070 1071 static Handle<String> StringFromFlags(Isolate* isolate, RegExpFlags flags); 1072 1073 // ****************************** 1074 // Static constants 1075 // ****************************** 1076 1077 static constexpr int kMaxCaptures = (1 << 15) - 1; 1078 1079 static constexpr int kNoBacktrackLimit = 0; 1080 }; 1081 1082 class IrRegExpData : public HeapObject { 1083 public: 1084 IrRegExpData() : HeapObject() {} 1085 IrRegExpData(js::RegExpShared* re) { setValue(JS::PrivateGCThingValue(re)); } 1086 1087 // ****************************************************** 1088 // Methods that are called from inside the implementation 1089 // ****************************************************** 1090 void TierUpTick() { inner()->tierUpTick(); } 1091 1092 Tagged<TrustedByteArray> bytecode(bool is_latin1) const { 1093 return TrustedByteArray::cast( 1094 Object(JS::PrivateValue(inner()->getByteCode(is_latin1)))); 1095 } 1096 1097 // TODO: should we expose this? 1098 uint32_t backtrack_limit() const { return 0; } 1099 1100 static IrRegExpData cast(Object object) { 1101 IrRegExpData regexp; 1102 js::gc::Cell* regexpShared = object.value().toGCThing(); 1103 MOZ_ASSERT(regexpShared->is<js::RegExpShared>()); 1104 regexp.setValue(JS::PrivateGCThingValue(regexpShared)); 1105 return regexp; 1106 } 1107 1108 inline uint32_t max_register_count() const { 1109 return inner()->getMaxRegisters(); 1110 } 1111 1112 RegExpFlags flags() const { return inner()->getFlags(); } 1113 1114 size_t capture_count() const { 1115 // Subtract 1 because pairCount includes the implicit global capture. 1116 return inner()->pairCount() - 1; 1117 } 1118 1119 private: 1120 js::RegExpShared* inner() const { 1121 return value().toGCThing()->as<js::RegExpShared>(); 1122 } 1123 }; 1124 1125 inline bool IsUnicode(RegExpFlags flags) { return flags.unicode(); } 1126 inline bool IsGlobal(RegExpFlags flags) { return flags.global(); } 1127 inline bool IsIgnoreCase(RegExpFlags flags) { return flags.ignoreCase(); } 1128 inline bool IsMultiline(RegExpFlags flags) { return flags.multiline(); } 1129 inline bool IsDotAll(RegExpFlags flags) { return flags.dotAll(); } 1130 inline bool IsSticky(RegExpFlags flags) { return flags.sticky(); } 1131 inline bool IsUnicodeSets(RegExpFlags flags) { return flags.unicodeSets(); } 1132 inline bool IsEitherUnicode(RegExpFlags flags) { 1133 return flags.unicode() || flags.unicodeSets(); 1134 } 1135 1136 inline std::optional<RegExpFlag> TryRegExpFlagFromChar(char c) { 1137 RegExpFlag flag; 1138 1139 // The parser only calls this after verifying that it's a supported flag. 1140 if (JS::MaybeParseRegExpFlag(c, &flag)) { 1141 return flag; 1142 } 1143 1144 return std::optional<RegExpFlag>{}; 1145 } 1146 1147 inline bool operator==(const RegExpFlags& lhs, const int& rhs) { 1148 return lhs.value() == rhs; 1149 } 1150 inline bool operator!=(const RegExpFlags& lhs, const int& rhs) { 1151 return !(lhs == rhs); 1152 } 1153 1154 class Histogram { 1155 public: 1156 inline void AddSample(int sample) {} 1157 }; 1158 1159 class Counters { 1160 public: 1161 Histogram* regexp_backtracks() { return ®exp_backtracks_; } 1162 1163 private: 1164 Histogram regexp_backtracks_; 1165 }; 1166 1167 enum class AllocationType : uint8_t { 1168 kYoung, // Allocate in the nursery 1169 kOld, // Allocate in the tenured heap 1170 }; 1171 1172 using StackGuard = Isolate; 1173 using Factory = Isolate; 1174 1175 class Isolate { 1176 public: 1177 Isolate(JSContext* cx) : cx_(cx) {} 1178 ~Isolate(); 1179 bool init(); 1180 1181 size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const; 1182 1183 //********** Isolate code **********// 1184 RegExpStack* regexp_stack() const { return regexpStack_; } 1185 1186 // This is called from inside no-GC code. Instead of suppressing GC 1187 // to allocate the error, we return false from Execute and call 1188 // ReportOverRecursed in the caller. 1189 void StackOverflow() {} 1190 1191 #ifndef V8_INTL_SUPPORT 1192 unibrow::Mapping<unibrow::Ecma262UnCanonicalize>* jsregexp_uncanonicalize() { 1193 return &jsregexp_uncanonicalize_; 1194 } 1195 unibrow::Mapping<unibrow::Ecma262Canonicalize>* 1196 regexp_macro_assembler_canonicalize() { 1197 return ®exp_macro_assembler_canonicalize_; 1198 } 1199 unibrow::Mapping<unibrow::CanonicalizationRange>* jsregexp_canonrange() { 1200 return &jsregexp_canonrange_; 1201 } 1202 1203 private: 1204 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize_; 1205 unibrow::Mapping<unibrow::Ecma262Canonicalize> 1206 regexp_macro_assembler_canonicalize_; 1207 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange_; 1208 #endif // !V8_INTL_SUPPORT 1209 1210 public: 1211 // An empty stub for telemetry we don't support 1212 void IncreaseTotalRegexpCodeGenerated(Handle<HeapObject> code) {} 1213 1214 Counters* counters() { return &counters_; } 1215 1216 //********** Factory code **********// 1217 inline Factory* factory() { return this; } 1218 1219 Handle<ByteArray> NewByteArray( 1220 int length, AllocationType allocation = AllocationType::kYoung); 1221 1222 Handle<TrustedByteArray> NewTrustedByteArray( 1223 int length, AllocationType allocation = AllocationType::kYoung); 1224 1225 // Allocates a fixed array initialized with undefined values. 1226 Handle<FixedArray> NewFixedArray(int length); 1227 1228 template <typename T> 1229 Handle<FixedIntegerArray<T>> NewFixedIntegerArray(uint32_t length); 1230 1231 template <typename Char> 1232 Handle<String> InternalizeString(const base::Vector<const Char>& str); 1233 1234 //********** Stack guard code **********// 1235 inline StackGuard* stack_guard() { return this; } 1236 1237 uintptr_t real_climit() { return cx_->stackLimit(JS::StackForSystemCode); } 1238 1239 // This is called from inside no-GC code. V8 runs the interrupt 1240 // inside the no-GC code and then "manually relocates unhandlified 1241 // references" afterwards. We just return a magic value and let the 1242 // caller handle interrupts. 1243 Object HandleInterrupts() { 1244 return Object(JS::MagicValue(JS_INTERRUPT_REGEXP)); 1245 } 1246 1247 JSContext* cx() const { return cx_; } 1248 1249 void trace(JSTracer* trc); 1250 1251 //********** Handle code **********// 1252 1253 JS::Value* getHandleLocation(const JS::Value& value); 1254 1255 private: 1256 mozilla::SegmentedVector<JS::Value, 256> handleArena_; 1257 mozilla::SegmentedVector<PseudoHandle<void>, 256> uniquePtrArena_; 1258 1259 void* allocatePseudoHandle(size_t bytes); 1260 1261 public: 1262 template <typename T> 1263 PseudoHandle<T> takeOwnership(void* ptr); 1264 template <typename T> 1265 PseudoHandle<T> maybeTakeOwnership(void* ptr); 1266 1267 uint32_t liveHandles() const { return handleArena_.Length(); } 1268 uint32_t livePseudoHandles() const { return uniquePtrArena_.Length(); } 1269 1270 private: 1271 void openHandleScope(HandleScope& scope) { 1272 scope.level_ = handleArena_.Length(); 1273 scope.non_gc_level_ = uniquePtrArena_.Length(); 1274 } 1275 void closeHandleScope(size_t prevLevel, size_t prevUniqueLevel) { 1276 size_t currLevel = handleArena_.Length(); 1277 handleArena_.PopLastN(currLevel - prevLevel); 1278 1279 size_t currUniqueLevel = uniquePtrArena_.Length(); 1280 uniquePtrArena_.PopLastN(currUniqueLevel - prevUniqueLevel); 1281 } 1282 friend class HandleScope; 1283 1284 JSContext* cx_; 1285 RegExpStack* regexpStack_{}; 1286 Counters counters_{}; 1287 #ifdef DEBUG 1288 public: 1289 uint32_t shouldSimulateInterrupt_ = 0; 1290 #endif 1291 }; 1292 1293 // Origin: 1294 // https://github.com/v8/v8/blob/50dcf2af54ce27801a71c47c1be1d2c5e36b0dd6/src/execution/isolate.h#L1909-L1931 1295 class StackLimitCheck { 1296 public: 1297 StackLimitCheck(Isolate* isolate) : cx_(isolate->cx()) {} 1298 1299 // Use this to check for stack-overflows in C++ code. 1300 bool HasOverflowed() { 1301 js::AutoCheckRecursionLimit recursion(cx_); 1302 bool overflowed = !recursion.checkDontReport(cx_); 1303 if (overflowed && js::SupportDifferentialTesting()) { 1304 // We don't report overrecursion here, but we throw an exception later 1305 // and this still affects differential testing. Mimic ReportOverRecursed 1306 // (the fuzzers check for this particular string). 1307 fprintf(stderr, "ReportOverRecursed called\n"); 1308 } 1309 return overflowed; 1310 } 1311 1312 // Use this to check for interrupt request in C++ code. 1313 bool InterruptRequested() { 1314 return cx_->hasPendingInterrupt(js::InterruptReason::CallbackUrgent); 1315 } 1316 1317 // Use this to check for stack-overflow when entering runtime from JS code. 1318 bool JsHasOverflowed() { 1319 js::AutoCheckRecursionLimit recursion(cx_); 1320 return !recursion.checkDontReport(cx_); 1321 } 1322 1323 private: 1324 JSContext* cx_; 1325 }; 1326 1327 class ExternalReference { 1328 public: 1329 static const void* TopOfRegexpStack(Isolate* isolate); 1330 static size_t SizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf, 1331 RegExpStack* regexpStack); 1332 }; 1333 1334 class Code : public HeapObject { 1335 public: 1336 uint8_t* raw_instruction_start() { return inner()->raw(); } 1337 1338 static Code cast(Object object) { 1339 Code c; 1340 js::gc::Cell* jitCode = object.value().toGCThing(); 1341 MOZ_ASSERT(jitCode->is<js::jit::JitCode>()); 1342 c.setValue(JS::PrivateGCThingValue(jitCode)); 1343 return c; 1344 } 1345 js::jit::JitCode* inner() { 1346 return value().toGCThing()->as<js::jit::JitCode>(); 1347 } 1348 }; 1349 1350 // Only used in function signature of functions we don't implement 1351 // (NativeRegExpMacroAssembler::CheckStackGuardState) 1352 class InstructionStream {}; 1353 1354 // Only used in the definition of RegExpGlobalExecRunner, which we don't use. 1355 class RegExpResultVectorScope {}; 1356 1357 // Origin: https://github.com/v8/v8/blob/master/src/codegen/label.h 1358 class Label { 1359 public: 1360 Label() : inner_(js::jit::Label()) {} 1361 1362 js::jit::Label* inner() { return &inner_; } 1363 1364 void Unuse() { inner_.reset(); } 1365 1366 bool is_linked() { return inner_.used(); } 1367 bool is_bound() { return inner_.bound(); } 1368 bool is_unused() { return !inner_.used() && !inner_.bound(); } 1369 1370 int pos() { return inner_.offset(); } 1371 void link_to(int pos) { inner_.use(pos); } 1372 void bind_to(int pos) { inner_.bind(pos); } 1373 1374 private: 1375 js::jit::Label inner_; 1376 js::jit::CodeOffset patchOffset_; 1377 1378 friend class SMRegExpMacroAssembler; 1379 }; 1380 1381 class RegExpUtils { 1382 public: 1383 static uint64_t AdvanceStringIndex(Tagged<String> string, uint64_t index, 1384 bool unicode); 1385 }; 1386 1387 #define v8_flags js::jit::JitOptions 1388 1389 #define V8_USE_COMPUTED_GOTO 1 1390 #define COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER 1391 1392 } // namespace internal 1393 } // namespace v8 1394 1395 namespace V8 { 1396 1397 inline void FatalProcessOutOfMemory(v8::internal::Isolate* isolate, 1398 const char* msg) { 1399 js::AutoEnterOOMUnsafeRegion oomUnsafe; 1400 oomUnsafe.crash(msg); 1401 } 1402 1403 } // namespace V8 1404 1405 #endif // RegexpShim_h