tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

RegExpShim.h (45307B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 // Copyright 2019 the V8 project authors. All rights reserved.
      8 // Use of this source code is governed by a BSD-style license that can be
      9 // found in the LICENSE file.
     10 
     11 #ifndef RegexpShim_h
     12 #define RegexpShim_h
     13 
     14 #include "mozilla/Assertions.h"
     15 #include "mozilla/Attributes.h"
     16 #include "mozilla/MathAlgorithms.h"
     17 #include "mozilla/Maybe.h"
     18 #include "mozilla/SegmentedVector.h"
     19 #include "mozilla/Sprintf.h"
     20 
     21 #include <algorithm>
     22 #include <cctype>
     23 #include <optional>
     24 
     25 #include "irregexp/RegExpTypes.h"
     26 #include "irregexp/util/FlagsShim.h"
     27 #include "irregexp/util/VectorShim.h"
     28 #include "irregexp/util/ZoneShim.h"
     29 #include "jit/JitCode.h"
     30 #include "jit/Label.h"
     31 #include "jit/shared/Assembler-shared.h"
     32 #include "js/friend/StackLimits.h"  // js::AutoCheckRecursionLimit
     33 #include "js/RegExpFlags.h"
     34 #include "js/Value.h"
     35 #include "threading/ExclusiveData.h"
     36 #include "util/DifferentialTesting.h"
     37 #include "vm/JSContext.h"
     38 #include "vm/MutexIDs.h"
     39 #include "vm/NativeObject.h"
     40 #include "vm/RegExpShared.h"
     41 
     42 // Forward declaration of classes
     43 namespace v8 {
     44 namespace internal {
     45 
     46 class Heap;
     47 class Isolate;
     48 class RegExpMatchInfo;
     49 class RegExpStack;
     50 
     51 template <typename T>
     52 class Handle;
     53 
     54 }  // namespace internal
     55 }  // namespace v8
     56 
     57 #define V8_WARN_UNUSED_RESULT [[nodiscard]]
     58 #define V8_EXPORT_PRIVATE
     59 #define V8_FALLTHROUGH [[fallthrough]]
     60 #define V8_NODISCARD [[nodiscard]]
     61 #define V8_NOEXCEPT noexcept
     62 
     63 #define FATAL(x) MOZ_CRASH(x)
     64 #define UNREACHABLE() MOZ_CRASH("unreachable code")
     65 #define UNIMPLEMENTED() MOZ_CRASH("unimplemented code")
     66 #define STATIC_ASSERT(exp) static_assert(exp, #exp)
     67 
     68 #define DCHECK MOZ_ASSERT
     69 #define DCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs))
     70 #define DCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs))
     71 #define DCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs))
     72 #define DCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs))
     73 #define DCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs))
     74 #define DCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs))
     75 #define DCHECK_NULL(val) MOZ_ASSERT((val) == nullptr)
     76 #define DCHECK_NOT_NULL(val) MOZ_ASSERT((val) != nullptr)
     77 #define DCHECK_IMPLIES(lhs, rhs) MOZ_ASSERT_IF(lhs, rhs)
     78 #define CHECK MOZ_RELEASE_ASSERT
     79 #define CHECK_EQ(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) == (rhs))
     80 #define CHECK_LE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) <= (rhs))
     81 #define CHECK_GE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) >= (rhs))
     82 #define CHECK_IMPLIES(lhs, rhs) MOZ_RELEASE_ASSERT(!(lhs) || (rhs))
     83 #define CONSTEXPR_DCHECK MOZ_ASSERT
     84 
     85 // These assertions are necessary to preserve the soundness of the V8
     86 // sandbox. In V8, they are debug-only if the sandbox is off, but release
     87 // asserts if the sandbox is turned on. We don't have an equivalent sandbox,
     88 // so they can be debug checks for now.
     89 #define SBXCHECK MOZ_ASSERT
     90 #define SBXCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs))
     91 #define SBXCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs))
     92 #define SBXCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs))
     93 #define SBXCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs))
     94 #define SBXCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs))
     95 #define SBXCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs))
     96 
     97 #define MemCopy memcpy
     98 
     99 // Origin:
    100 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L310-L319
    101 // ptrdiff_t is 't' according to the standard, but MSVC uses 'I'.
    102 #ifdef _MSC_VER
    103 #  define V8PRIxPTRDIFF "Ix"
    104 #  define V8PRIdPTRDIFF "Id"
    105 #  define V8PRIuPTRDIFF "Iu"
    106 #else
    107 #  define V8PRIxPTRDIFF "tx"
    108 #  define V8PRIdPTRDIFF "td"
    109 #  define V8PRIuPTRDIFF "tu"
    110 #endif
    111 
    112 #define arraysize std::size
    113 
    114 // Explicitly declare the assignment operator as deleted.
    115 #define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete
    116 
    117 // Explicitly declare the copy constructor and assignment operator as deleted.
    118 // This also deletes the implicit move constructor and implicit move assignment
    119 // operator, but still allows to manually define them.
    120 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
    121  TypeName(const TypeName&) = delete;      \
    122  DISALLOW_ASSIGN(TypeName)
    123 
    124 // Explicitly declare all implicit constructors as deleted, namely the
    125 // default constructor, copy constructor and operator= functions.
    126 // This is especially useful for classes containing only static methods.
    127 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
    128  TypeName() = delete;                           \
    129  DISALLOW_COPY_AND_ASSIGN(TypeName)
    130 
    131 namespace v8 {
    132 
    133 // Origin:
    134 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L364-L367
    135 template <typename T, typename U>
    136 constexpr inline bool IsAligned(T value, U alignment) {
    137  return (value & (alignment - 1)) == 0;
    138 }
    139 
    140 using Address = uintptr_t;
    141 static const Address kNullAddress = 0;
    142 
    143 inline uintptr_t GetCurrentStackPosition() {
    144  return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
    145 }
    146 
    147 namespace base {
    148 
    149 // Latin1/UTF-16 constants
    150 // Code-point values in Unicode 4.0 are 21 bits wide.
    151 // Code units in UTF-16 are 16 bits wide.
    152 using uc16 = char16_t;
    153 using uc32 = uint32_t;
    154 
    155 constexpr int kUC16Size = sizeof(base::uc16);
    156 
    157 // Origin:
    158 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L247-L258
    159 // The USE(x, ...) template is used to silence C++ compiler warnings
    160 // issued for (yet) unused variables (typically parameters).
    161 // The arguments are guaranteed to be evaluated from left to right.
    162 struct Use {
    163  template <typename T>
    164  Use(T&&) {}  // NOLINT(runtime/explicit)
    165 };
    166 #define USE(...)                                                   \
    167  do {                                                             \
    168    ::v8::base::Use unused_tmp_array_for_use_macro[]{__VA_ARGS__}; \
    169    (void)unused_tmp_array_for_use_macro;                          \
    170  } while (false)
    171 
    172 // Origin:
    173 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/safe_conversions.h#L35-L39
    174 // saturated_cast<> is analogous to static_cast<> for numeric types, except
    175 // that the specified numeric conversion will saturate rather than overflow or
    176 // underflow.
    177 template <typename Dst, typename Src>
    178 inline Dst saturated_cast(Src value);
    179 
    180 // These are the only specializations that are needed for regexp code.
    181 // Instead of pulling in dozens of lines of template goo
    182 // to derive it, I used the implementation from uint8_clamped in
    183 // ArrayBufferObject.h.
    184 template <>
    185 inline uint8_t saturated_cast<uint8_t, int>(int x) {
    186  return (x >= 0) ? ((x < 255) ? uint8_t(x) : 255) : 0;
    187 }
    188 template <>
    189 inline uint8_t saturated_cast<uint8_t, uint32_t>(uint32_t x) {
    190  return (x < 255) ? uint8_t(x) : 255;
    191 }
    192 
    193 // Origin:
    194 // https://github.com/v8/v8/blob/fc088cdaccadede84886eee881e67af9db53669a/src/base/bounds.h#L14-L28
    195 // Checks if value is in range [lower_limit, higher_limit] using a single
    196 // branch.
    197 template <typename T, typename U>
    198 inline constexpr bool IsInRange(T value, U lower_limit, U higher_limit) {
    199  using unsigned_T = typename std::make_unsigned<T>::type;
    200  // Use static_cast to support enum classes.
    201  return static_cast<unsigned_T>(static_cast<unsigned_T>(value) -
    202                                 static_cast<unsigned_T>(lower_limit)) <=
    203         static_cast<unsigned_T>(static_cast<unsigned_T>(higher_limit) -
    204                                 static_cast<unsigned_T>(lower_limit));
    205 }
    206 
    207 #define LAZY_INSTANCE_INITIALIZER \
    208  {                               \
    209  }
    210 
    211 template <typename T>
    212 class LazyInstanceImpl {
    213 public:
    214  LazyInstanceImpl() : value_(js::mutexid::IrregexpLazyStatic) {}
    215 
    216  const T* Pointer() {
    217    auto val = value_.lock();
    218    if (val->isNothing()) {
    219      val->emplace();
    220    }
    221    return val->ptr();
    222  }
    223 
    224 private:
    225  js::ExclusiveData<mozilla::Maybe<T>> value_;
    226 };
    227 
    228 template <typename T>
    229 class LazyInstance {
    230 public:
    231  using type = LazyInstanceImpl<T>;
    232 };
    233 
    234 // Origin:
    235 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L40-L48
    236 // Returns the value (0 .. 15) of a hexadecimal character c.
    237 // If c is not a legal hexadecimal character, returns a value < 0.
    238 // Used in regexp-parser.cc
    239 inline int HexValue(base::uc32 c) {
    240  c -= '0';
    241  if (static_cast<unsigned>(c) <= 9) return c;
    242  c = (c | 0x20) - ('a' - '0');  // detect 0x11..0x16 and 0x31..0x36.
    243  if (static_cast<unsigned>(c) <= 5) return c + 10;
    244  return -1;
    245 }
    246 
    247 template <typename... Args>
    248 [[nodiscard]] uint32_t hash_combine(uint32_t aHash, Args... aArgs) {
    249  return mozilla::AddToHash(aHash, aArgs...);
    250 }
    251 
    252 namespace bits {
    253 
    254 inline uint64_t CountTrailingZeros(uint64_t value) {
    255  return mozilla::CountTrailingZeroes64(value);
    256 }
    257 
    258 inline size_t RoundUpToPowerOfTwo32(size_t value) {
    259  return mozilla::RoundUpPow2(value);
    260 }
    261 
    262 template <typename T>
    263 constexpr bool IsPowerOfTwo(T value) {
    264  return value > 0 && (value & (value - 1)) == 0;
    265 }
    266 
    267 }  // namespace bits
    268 }  // namespace base
    269 
    270 namespace unibrow {
    271 
    272 using uchar = unsigned int;
    273 
    274 // Origin:
    275 // https://github.com/v8/v8/blob/1f1e4cdb04c75eab77adbecd5f5514ddc3eb56cf/src/strings/unicode.h#L133-L150
    276 class Latin1 {
    277 public:
    278  static const base::uc16 kMaxChar = 0xff;
    279 
    280  // Convert the character to Latin-1 case equivalent if possible.
    281  static inline base::uc16 TryConvertToLatin1(base::uc16 c) {
    282    // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN".
    283    // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN".
    284    if (c == 0x039C || c == 0x03BC) {
    285      return 0xB5;
    286    }
    287    // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER
    288    // Y WITH DIAERESIS".
    289    if (c == 0x0178) {
    290      return 0xFF;
    291    }
    292    return c;
    293  }
    294 };
    295 
    296 // Origin:
    297 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L99-L131
    298 class Utf16 {
    299 public:
    300  static inline bool IsLeadSurrogate(int code) {
    301    return js::unicode::IsLeadSurrogate(code);
    302  }
    303  static inline bool IsTrailSurrogate(int code) {
    304    return js::unicode::IsTrailSurrogate(code);
    305  }
    306  static inline base::uc16 LeadSurrogate(uint32_t char_code) {
    307    return js::unicode::LeadSurrogate(char_code);
    308  }
    309  static inline base::uc16 TrailSurrogate(uint32_t char_code) {
    310    return js::unicode::TrailSurrogate(char_code);
    311  }
    312  static inline uint32_t CombineSurrogatePair(char16_t lead, char16_t trail) {
    313    return js::unicode::UTF16Decode(lead, trail);
    314  }
    315  static const uchar kMaxNonSurrogateCharCode = 0xffff;
    316 };
    317 
    318 #ifndef V8_INTL_SUPPORT
    319 
    320 // A cache used in case conversion.  It caches the value for characters
    321 // that either have no mapping or map to a single character independent
    322 // of context.  Characters that map to more than one character or that
    323 // map differently depending on context are always looked up.
    324 // Origin:
    325 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L64-L88
    326 template <class T, int size = 256>
    327 class Mapping {
    328 public:
    329  inline Mapping() = default;
    330  inline int get(uchar c, uchar n, uchar* result) {
    331    CacheEntry entry = entries_[c & kMask];
    332    if (entry.code_point_ == c) {
    333      if (entry.offset_ == 0) {
    334        return 0;
    335      } else {
    336        result[0] = c + entry.offset_;
    337        return 1;
    338      }
    339    } else {
    340      return CalculateValue(c, n, result);
    341    }
    342  }
    343 
    344 private:
    345  int CalculateValue(uchar c, uchar n, uchar* result) {
    346    bool allow_caching = true;
    347    int length = T::Convert(c, n, result, &allow_caching);
    348    if (allow_caching) {
    349      if (length == 1) {
    350        entries_[c & kMask] = CacheEntry(c, result[0] - c);
    351        return 1;
    352      } else {
    353        entries_[c & kMask] = CacheEntry(c, 0);
    354        return 0;
    355      }
    356    } else {
    357      return length;
    358    }
    359  }
    360 
    361  struct CacheEntry {
    362    inline CacheEntry() : code_point_(kNoChar), offset_(0) {}
    363    inline CacheEntry(uchar code_point, signed offset)
    364        : code_point_(code_point), offset_(offset) {}
    365    uchar code_point_;
    366    signed offset_;
    367    static const int kNoChar = (1 << 21) - 1;
    368  };
    369  static const int kSize = size;
    370  static const int kMask = kSize - 1;
    371  CacheEntry entries_[kSize];
    372 };
    373 
    374 // Origin:
    375 // https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L241-L252
    376 struct Ecma262Canonicalize {
    377  static const int kMaxWidth = 1;
    378  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
    379 };
    380 struct Ecma262UnCanonicalize {
    381  static const int kMaxWidth = 4;
    382  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
    383 };
    384 struct CanonicalizationRange {
    385  static const int kMaxWidth = 1;
    386  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
    387 };
    388 
    389 #endif  // !V8_INTL_SUPPORT
    390 
    391 struct Letter {
    392  static bool Is(uchar c);
    393 };
    394 
    395 }  // namespace unibrow
    396 
    397 namespace internal {
    398 
    399 #define PRINTF_FORMAT(x, y) MOZ_FORMAT_PRINTF(x, y)
    400 void PRINTF_FORMAT(1, 2) PrintF(const char* format, ...);
    401 void PRINTF_FORMAT(2, 3) PrintF(FILE* out, const char* format, ...);
    402 
    403 // Superclass for classes only using static method functions.
    404 // The subclass of AllStatic cannot be instantiated at all.
    405 class AllStatic {
    406 #ifdef DEBUG
    407 public:
    408  AllStatic() = delete;
    409 #endif
    410 };
    411 
    412 // Superclass for classes managed with new and delete.
    413 // In irregexp, this is only AlternativeGeneration (in regexp-compiler.cc)
    414 // Compare:
    415 // https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/utils/allocation.cc#L88-L96
    416 class Malloced {
    417 public:
    418  static void* operator new(size_t size) {
    419    js::AutoEnterOOMUnsafeRegion oomUnsafe;
    420    void* result = js_malloc(size);
    421    if (!result) {
    422      oomUnsafe.crash("Irregexp Malloced shim");
    423    }
    424    return result;
    425  }
    426  static void operator delete(void* p) { js_free(p); }
    427 };
    428 
    429 constexpr int32_t KB = 1024;
    430 constexpr int32_t MB = 1024 * 1024;
    431 
    432 #define kMaxInt JSVAL_INT_MAX
    433 #define kMinInt JSVAL_INT_MIN
    434 constexpr int kSystemPointerSize = sizeof(void*);
    435 
    436 // The largest integer n such that n and n + 1 are both exactly
    437 // representable as a Number value.  ES6 section 20.1.2.6
    438 constexpr uint64_t kMaxSafeIntegerUint64 = (uint64_t{1} << 53) - 1;
    439 constexpr double kMaxSafeInteger = static_cast<double>(kMaxSafeIntegerUint64);
    440 
    441 constexpr int kBitsPerByte = 8;
    442 constexpr int kBitsPerByteLog2 = 3;
    443 constexpr int kUInt16Size = sizeof(uint16_t);
    444 constexpr int kInt32Size = sizeof(int32_t);
    445 constexpr int kUInt32Size = sizeof(uint32_t);
    446 constexpr int kInt64Size = sizeof(int64_t);
    447 
    448 constexpr int kMaxUInt16 = (1 << 16) - 1;
    449 
    450 inline constexpr bool IsDecimalDigit(base::uc32 c) {
    451  return c >= '0' && c <= '9';
    452 }
    453 
    454 inline constexpr int AsciiAlphaToLower(base::uc32 c) { return c | 0x20; }
    455 
    456 inline bool is_uint24(int64_t val) { return (val >> 24) == 0; }
    457 inline bool is_int24(int64_t val) {
    458  int64_t limit = int64_t(1) << 23;
    459  return (-limit <= val) && (val < limit);
    460 }
    461 
    462 inline bool IsIdentifierStart(base::uc32 c) {
    463  return js::unicode::IsIdentifierStart(char32_t(c));
    464 }
    465 inline bool IsIdentifierPart(base::uc32 c) {
    466  return js::unicode::IsIdentifierPart(char32_t(c));
    467 }
    468 
    469 // Wrappers to disambiguate char16_t and uc16.
    470 struct AsUC16 {
    471  explicit AsUC16(char16_t v) : value(v) {}
    472  char16_t value;
    473 };
    474 
    475 struct AsUC32 {
    476  explicit AsUC32(int32_t v) : value(v) {}
    477  int32_t value;
    478 };
    479 
    480 std::ostream& operator<<(std::ostream& os, const AsUC16& c);
    481 std::ostream& operator<<(std::ostream& os, const AsUC32& c);
    482 
    483 // This class is used for the output of trace-regexp-parser.  V8 has
    484 // an elaborate implementation to ensure that the output gets to the
    485 // right place, even on Android. We just need something that will
    486 // print output (ideally to stderr, to match the rest of our tracing
    487 // code). This is an empty wrapper that will convert itself to
    488 // std::cerr when used.
    489 class StdoutStream {
    490 public:
    491  operator std::ostream&() const;
    492  template <typename T>
    493  std::ostream& operator<<(T t);
    494 };
    495 
    496 // Reuse existing Maybe implementation
    497 using mozilla::Maybe;
    498 
    499 template <typename T>
    500 Maybe<T> Just(const T& value) {
    501  return mozilla::Some(value);
    502 }
    503 
    504 template <typename T>
    505 mozilla::Nothing Nothing() {
    506  return mozilla::Nothing();
    507 }
    508 
    509 template <typename T>
    510 using PseudoHandle = mozilla::UniquePtr<T, JS::FreePolicy>;
    511 
    512 // Compare 8bit/16bit chars to 8bit/16bit chars.
    513 // Used indirectly by regexp-interpreter.cc
    514 // Taken from: https://github.com/v8/v8/blob/master/src/utils/utils.h
    515 template <typename lchar, typename rchar>
    516 inline int CompareCharsUnsigned(const lchar* lhs, const rchar* rhs,
    517                                size_t chars) {
    518  const lchar* limit = lhs + chars;
    519  if (sizeof(*lhs) == sizeof(char) && sizeof(*rhs) == sizeof(char)) {
    520    // memcmp compares byte-by-byte, yielding wrong results for two-byte
    521    // strings on little-endian systems.
    522    return memcmp(lhs, rhs, chars);
    523  }
    524  while (lhs < limit) {
    525    int r = static_cast<int>(*lhs) - static_cast<int>(*rhs);
    526    if (r != 0) return r;
    527    ++lhs;
    528    ++rhs;
    529  }
    530  return 0;
    531 }
    532 template <typename lchar, typename rchar>
    533 inline int CompareChars(const lchar* lhs, const rchar* rhs, size_t chars) {
    534  DCHECK_LE(sizeof(lchar), 2);
    535  DCHECK_LE(sizeof(rchar), 2);
    536  if (sizeof(lchar) == 1) {
    537    if (sizeof(rchar) == 1) {
    538      return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs),
    539                                  reinterpret_cast<const uint8_t*>(rhs), chars);
    540    } else {
    541      return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs),
    542                                  reinterpret_cast<const char16_t*>(rhs),
    543                                  chars);
    544    }
    545  } else {
    546    if (sizeof(rchar) == 1) {
    547      return CompareCharsUnsigned(reinterpret_cast<const char16_t*>(lhs),
    548                                  reinterpret_cast<const uint8_t*>(rhs), chars);
    549    } else {
    550      return CompareCharsUnsigned(reinterpret_cast<const char16_t*>(lhs),
    551                                  reinterpret_cast<const char16_t*>(rhs),
    552                                  chars);
    553    }
    554  }
    555 }
    556 
    557 // Compare 8bit/16bit chars to 8bit/16bit chars.
    558 template <typename lchar, typename rchar>
    559 inline bool CompareCharsEqualUnsigned(const lchar* lhs, const rchar* rhs,
    560                                      size_t chars) {
    561  STATIC_ASSERT(std::is_unsigned<lchar>::value);
    562  STATIC_ASSERT(std::is_unsigned<rchar>::value);
    563  if (sizeof(*lhs) == sizeof(*rhs)) {
    564    // memcmp compares byte-by-byte, but for equality it doesn't matter whether
    565    // two-byte char comparison is little- or big-endian.
    566    return memcmp(lhs, rhs, chars * sizeof(*lhs)) == 0;
    567  }
    568  for (const lchar* limit = lhs + chars; lhs < limit; ++lhs, ++rhs) {
    569    if (*lhs != *rhs) return false;
    570  }
    571  return true;
    572 }
    573 
    574 template <typename lchar, typename rchar>
    575 inline bool CompareCharsEqual(const lchar* lhs, const rchar* rhs,
    576                              size_t chars) {
    577  using ulchar = typename std::make_unsigned<lchar>::type;
    578  using urchar = typename std::make_unsigned<rchar>::type;
    579  return CompareCharsEqualUnsigned(reinterpret_cast<const ulchar*>(lhs),
    580                                   reinterpret_cast<const urchar*>(rhs), chars);
    581 }
    582 
    583 // V8::Object ~= JS::Value
    584 class Object {
    585 public:
    586  // The default object constructor in V8 stores a nullptr,
    587  // which has its low bit clear and is interpreted as Smi(0).
    588  constexpr Object() : asBits_(JS::Int32Value(0).asRawBits()) {}
    589 
    590  Object(const JS::Value& value) : asBits_(value.asRawBits()) {}
    591 
    592  // This constructor is only used in an unused implementation of
    593  // IsCharacterInRangeArray in regexp-macro-assembler.cc.
    594  Object(uintptr_t raw) : asBits_(raw) { MOZ_CRASH("unused"); }
    595 
    596  JS::Value value() const { return JS::Value::fromRawBits(asBits_); }
    597 
    598  inline static Object cast(Object object) { return object; }
    599 
    600 protected:
    601  void setValue(const JS::Value& val) { asBits_ = val.asRawBits(); }
    602  uint64_t asBits_;
    603 } JS_HAZ_GC_POINTER;
    604 
    605 // Used in regexp-interpreter.cc to check the return value of
    606 // isolate->stack_guard()->HandleInterrupts(). We want to handle
    607 // interrupts in the caller, so we return a magic value from
    608 // HandleInterrupts and check for it here.
    609 inline bool IsExceptionHole(Object obj, Isolate*) {
    610  return obj.value().isMagic(JS_INTERRUPT_REGEXP);
    611 }
    612 
    613 class Smi : public Object {
    614 public:
    615  static Smi FromInt(int32_t value) {
    616    Smi smi;
    617    smi.setValue(JS::Int32Value(value));
    618    return smi;
    619  }
    620  static inline int32_t ToInt(const Object object) {
    621    return object.value().toInt32();
    622  }
    623 };
    624 
    625 // V8::HeapObject ~= GC thing
    626 class HeapObject : public Object {
    627 public:
    628  inline static HeapObject cast(Object object) {
    629    HeapObject h;
    630    h.setValue(object.value());
    631    return h;
    632  }
    633 };
    634 
    635 // V8's values use low-bit tagging. If the LSB is 0, it's a small
    636 // integer. If the LSB is 1, it's a pointer to some GC thing. In V8,
    637 // this wrapper class is used to represent a pointer that has the low
    638 // bit set, or a small integer that has been shifted left by one
    639 // bit. We don't use the same tagging system, so all we need is a
    640 // transparent wrapper that automatically converts to/from the wrapped
    641 // type.
    642 template <typename T>
    643 class Tagged {
    644 public:
    645  Tagged() {}
    646  MOZ_IMPLICIT Tagged(const T& value) : value_(value) {}
    647  MOZ_IMPLICIT Tagged(T&& value) : value_(std::move(value)) {}
    648 
    649  T* operator->() { return &value_; }
    650  constexpr operator T() const { return value_; }
    651 
    652 private:
    653  T value_;
    654 };
    655 
    656 // Adapted from v8/src/objects/casting.h
    657 
    658 template <typename To, typename From>
    659 inline Tagged<To> UncheckedCast(Tagged<From> value) {
    660  return Tagged<To>(To::cast(value));
    661 }
    662 
    663 template <typename To, typename From>
    664 inline Tagged<To> Cast(const From& value) {
    665  return UncheckedCast<To>(Tagged(value));
    666 }
    667 
    668 // A fixed-size array with Objects (aka Values) as element types.
    669 // Implemented using the dense elements of an ArrayObject.
    670 // Used for named captures.
    671 class FixedArray : public HeapObject {
    672 public:
    673  inline void set(uint32_t index, Object value) {
    674    inner()->setDenseElement(index, value.value());
    675  }
    676  inline static FixedArray cast(Object object) {
    677    FixedArray f;
    678    f.setValue(object.value());
    679    return f;
    680  }
    681  js::NativeObject* inner() {
    682    return &value().toObject().as<js::NativeObject>();
    683  }
    684 };
    685 
    686 /*
    687 * Conceptually, ByteArrayData is a variable-size structure. To
    688 * implement this in a C++-approved way, we allocate a struct
    689 * containing the 32-bit length field, followed by additional memory
    690 * for the data. To access the data, we get a pointer to the next byte
    691 * after the length field and cast it to the correct type.
    692 */
    693 inline uint8_t* ByteArrayData::data() {
    694  static_assert(alignof(uint8_t) <= alignof(ByteArrayData),
    695                "The trailing data must be aligned to start immediately "
    696                "after the header with no padding.");
    697  ByteArrayData* immediatelyAfter = this + 1;
    698  return reinterpret_cast<uint8_t*>(immediatelyAfter);
    699 }
    700 
    701 template <typename T>
    702 T* ByteArrayData::typedData() {
    703  static_assert(alignof(T) <= alignof(ByteArrayData));
    704  MOZ_ASSERT(uintptr_t(data()) % alignof(T) == 0);
    705  return reinterpret_cast<T*>(data());
    706 }
    707 
    708 template <typename T>
    709 T ByteArrayData::getTyped(uint32_t index) {
    710  MOZ_ASSERT(index < length() / sizeof(T));
    711  return typedData<T>()[index];
    712 }
    713 
    714 template <typename T>
    715 void ByteArrayData::setTyped(uint32_t index, T value) {
    716  MOZ_ASSERT(index < length() / sizeof(T));
    717  typedData<T>()[index] = value;
    718 }
    719 
    720 // A fixed-size array of bytes.
    721 class ByteArray : public HeapObject {
    722 protected:
    723  ByteArrayData* inner() const {
    724    return static_cast<ByteArrayData*>(value().toPrivate());
    725  }
    726  friend bool IsByteArray(Object obj);
    727 
    728 public:
    729  PseudoHandle<ByteArrayData> takeOwnership(Isolate* isolate);
    730  PseudoHandle<ByteArrayData> maybeTakeOwnership(Isolate* isolate);
    731 
    732  uint8_t get(uint32_t index) { return inner()->get(index); }
    733  void set(uint32_t index, uint8_t val) { inner()->set(index, val); }
    734 
    735  uint32_t length() const { return inner()->length(); }
    736  uint8_t* begin() { return inner()->data(); }
    737 
    738  static ByteArray cast(Object object) {
    739    ByteArray b;
    740    b.setValue(object.value());
    741    return b;
    742  }
    743 
    744  friend class SMRegExpMacroAssembler;
    745 };
    746 
    747 // A byte array that can be trusted to not contain malicious data.
    748 // See https://issues.chromium.org/issues/40069826.
    749 class TrustedByteArray : public ByteArray {
    750 public:
    751  static TrustedByteArray cast(Object object) {
    752    TrustedByteArray b;
    753    b.setValue(object.value());
    754    return b;
    755  }
    756 };
    757 
    758 // This is only used in assertions. In debug builds, we put a magic value
    759 // in the header of each ByteArrayData, and assert here that it matches.
    760 inline bool IsByteArray(Object obj) {
    761  MOZ_ASSERT(ByteArray::cast(obj).inner()->magic() ==
    762             ByteArrayData::ExpectedMagic);
    763  return true;
    764 }
    765 
    766 // This is a convenience class used in V8 for treating a ByteArray as an array
    767 // of fixed-size integers. This version supports integral types up to 32 bits.
    768 template <typename T>
    769 class FixedIntegerArray : public ByteArray {
    770  static_assert(alignof(T) <= alignof(ByteArrayData));
    771  static_assert(std::is_integral<T>::value);
    772 
    773 public:
    774  static Handle<FixedIntegerArray<T>> New(Isolate* isolate, uint32_t length);
    775 
    776  T get(uint32_t index) { return inner()->template getTyped<T>(index); };
    777  void set(uint32_t index, T value) {
    778    inner()->template setTyped<T>(index, value);
    779  }
    780 
    781  static FixedIntegerArray<T> cast(Object object) {
    782    FixedIntegerArray<T> f;
    783    f.setValue(object.value());
    784    return f;
    785  }
    786 };
    787 
    788 using FixedUInt16Array = FixedIntegerArray<uint16_t>;
    789 
    790 // Like Handles in SM, V8 handles are references to marked pointers.
    791 // Unlike SM, where Rooted pointers are created individually on the
    792 // stack, the target of a V8 handle lives in an arena on the isolate
    793 // (~= JSContext). Whenever a Handle is created, a new "root" is
    794 // created at the end of the arena.
    795 //
    796 // HandleScopes are used to manage the lifetimes of these handles.  A
    797 // HandleScope lives on the stack and stores the size of the arena at
    798 // the time of its creation. When the function returns and the
    799 // HandleScope is destroyed, the arena is truncated to its previous
    800 // size, clearing all roots that were created since the creation of
    801 // the HandleScope.
    802 //
    803 // In some cases, objects that are GC-allocated in V8 are not in SM.
    804 // In particular, irregexp allocates ByteArrays during code generation
    805 // to store lookup tables. This does not play nicely with the SM
    806 // macroassembler's requirement that no GC allocations take place
    807 // while it is on the stack. To work around this, this shim layer also
    808 // provides the ability to create pseudo-handles, which are not
    809 // managed by the GC but provide the same API to irregexp. The "root"
    810 // of a pseudohandle is a unique pointer living in a second arena. If
    811 // the allocated object should outlive the HandleScope, it must be
    812 // manually moved out of the arena using maybeTakeOwnership.
    813 // (If maybeTakeOwnership is called multiple times, it will return
    814 // a null pointer on subsequent calls.)
    815 
    816 class MOZ_STACK_CLASS HandleScope {
    817 public:
    818  HandleScope(Isolate* isolate);
    819  ~HandleScope();
    820 
    821 private:
    822  size_t level_ = 0;
    823  size_t non_gc_level_ = 0;
    824  Isolate* isolate_;
    825 
    826  friend class Isolate;
    827 };
    828 
    829 // Origin:
    830 // https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/handles.h#L88-L171
    831 template <typename T>
    832 class MOZ_NONHEAP_CLASS Handle {
    833 public:
    834  Handle() : location_(nullptr) {}
    835  Handle(T object, Isolate* isolate);
    836  Handle(const JS::Value& value, Isolate* isolate);
    837 
    838  // Constructor for handling automatic up casting.
    839  template <typename S,
    840            typename = std::enable_if_t<std::is_convertible_v<S*, T*>>>
    841  inline Handle(Handle<S> handle) : location_(handle.location_) {}
    842 
    843  inline bool is_null() const { return location_ == nullptr; }
    844 
    845  inline T operator*() const { return T::cast(Object(*location_)); };
    846 
    847  // {ObjectRef} is returned by {Handle::operator->}. It should never be stored
    848  // anywhere or used in any other code; no one should ever have to spell out
    849  // {ObjectRef} in code. Its only purpose is to be dereferenced immediately by
    850  // "operator-> chaining". Returning the address of the field is valid because
    851  // this object's lifetime only ends at the end of the full statement.
    852  // Origin:
    853  // https://github.com/v8/v8/blob/03aaa4b3bf4cb01eee1f223b252e6869b04ab08c/src/handles/handles.h#L91-L105
    854  class MOZ_TEMPORARY_CLASS ObjectRef {
    855   public:
    856    T* operator->() { return &object_; }
    857 
    858   private:
    859    friend class Handle;
    860    explicit ObjectRef(T object) : object_(object) {}
    861 
    862    T object_;
    863  };
    864  inline ObjectRef operator->() const { return ObjectRef{**this}; }
    865 
    866  static Handle<T> fromHandleValue(JS::HandleValue handle) {
    867    return Handle(handle.address());
    868  }
    869 
    870 private:
    871  Handle(const JS::Value* location) : location_(location) {}
    872 
    873  template <typename>
    874  friend class Handle;
    875  template <typename>
    876  friend class MaybeHandle;
    877 
    878  const JS::Value* location_;
    879 };
    880 
    881 // A Handle can be converted into a MaybeHandle. Converting a MaybeHandle
    882 // into a Handle requires checking that it does not point to nullptr.  This
    883 // ensures nullptr checks before use.
    884 //
    885 // Also note that Handles do not provide default equality comparison or hashing
    886 // operators on purpose. Such operators would be misleading, because intended
    887 // semantics is ambiguous between Handle location and object identity.
    888 // Origin:
    889 // https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/maybe-handles.h#L15-L78
    890 template <typename T>
    891 class MOZ_NONHEAP_CLASS MaybeHandle final {
    892 public:
    893  MaybeHandle() : location_(nullptr) {}
    894 
    895  // Constructor for handling automatic up casting from Handle.
    896  // Ex. Handle<JSArray> can be passed when MaybeHandle<Object> is expected.
    897  template <typename S,
    898            typename = std::enable_if_t<std::is_convertible_v<S*, T*>>>
    899  MaybeHandle(Handle<S> handle) : location_(handle.location_) {}
    900 
    901  inline Handle<T> ToHandleChecked() const {
    902    MOZ_RELEASE_ASSERT(location_);
    903    return Handle<T>(location_);
    904  }
    905 
    906  // Convert to a Handle with a type that can be upcasted to.
    907  template <typename S>
    908  inline bool ToHandle(Handle<S>* out) const {
    909    if (location_) {
    910      *out = Handle<T>(location_);
    911      return true;
    912    } else {
    913      *out = Handle<T>();
    914      return false;
    915    }
    916  }
    917 
    918 private:
    919  JS::Value* location_;
    920 };
    921 
    922 // From v8/src/handles/handles-inl.h
    923 
    924 template <typename T>
    925 inline Handle<T> handle(T object, Isolate* isolate) {
    926  return Handle<T>(object, isolate);
    927 }
    928 
    929 // V8 is migrating to a conservative stack scanning approach. When that
    930 // is enabled, a DirectHandle points directly at the V8 heap, and an
    931 // IndirectHandle is an unmigrated old-style Handle with a layer of
    932 // indirection. When disabled (which matches our implementation) the two
    933 // types are the same. See:
    934 // https://github.com/v8/v8/blob/887ec63c43e23c4fefba1c52d4525654bdc71e5b/src/common/globals.h#L1000-L1012
    935 template <typename T>
    936 using DirectHandle = Handle<T>;
    937 
    938 template <typename T>
    939 using IndirectHandle = Handle<T>;
    940 
    941 template <typename T>
    942 using MaybeDirectHandle = MaybeHandle<T>;
    943 
    944 // RAII Guard classes
    945 
    946 using DisallowGarbageCollection = JS::AutoAssertNoGC;
    947 
    948 // V8 uses this inside DisallowGarbageCollection regions to turn
    949 // allocation back on before throwing a stack overflow exception or
    950 // handling interrupts. AutoSuppressGC is sufficient for the former
    951 // case, but not for the latter: handling interrupts can execute
    952 // arbitrary script code, and V8 jumps through some scary hoops to
    953 // "manually relocate unhandlified references" afterwards. To keep
    954 // things sane, we don't try to handle interrupts while regex code is
    955 // still on the stack. Instead, we return EXCEPTION and handle
    956 // interrupts in the caller. (See RegExpShared::execute.)
    957 
    958 class AllowGarbageCollection {
    959 public:
    960  AllowGarbageCollection() {}
    961 };
    962 
    963 // Origin:
    964 // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L83-L474
    965 class String : public HeapObject {
    966 private:
    967  JSString* str() const { return value().toString(); }
    968 
    969 public:
    970  String() = default;
    971  String(JSString* str) { setValue(JS::StringValue(str)); }
    972 
    973  operator JSString*() const { return str(); }
    974 
    975  // Max char codes.
    976  static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
    977  static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
    978  static const int kMaxUtf16CodeUnit = 0xffff;
    979  static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
    980  static const base::uc32 kMaxCodePoint = 0x10ffff;
    981 
    982  MOZ_ALWAYS_INLINE int length() const { return str()->length(); }
    983  bool IsFlat() { return str()->isLinear(); };
    984 
    985  // Origin:
    986  // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L95-L152
    987  class FlatContent {
    988   public:
    989    FlatContent(JSLinearString* string, const DisallowGarbageCollection& no_gc)
    990        : string_(string), no_gc_(no_gc) {}
    991    inline bool IsOneByte() const { return string_->hasLatin1Chars(); }
    992    inline bool IsTwoByte() const { return !string_->hasLatin1Chars(); }
    993 
    994    base::Vector<const uint8_t> ToOneByteVector() const {
    995      MOZ_ASSERT(IsOneByte());
    996      return base::Vector<const uint8_t>(string_->latin1Chars(no_gc_),
    997                                         string_->length());
    998    }
    999    base::Vector<const base::uc16> ToUC16Vector() const {
   1000      MOZ_ASSERT(IsTwoByte());
   1001      return base::Vector<const base::uc16>(string_->twoByteChars(no_gc_),
   1002                                            string_->length());
   1003    }
   1004    void UnsafeDisableChecksumVerification() {
   1005      // Intentional no-op. See the comment for AllowGarbageCollection above.
   1006    }
   1007 
   1008   private:
   1009    const JSLinearString* string_;
   1010    const JS::AutoAssertNoGC& no_gc_;
   1011  };
   1012  FlatContent GetFlatContent(const DisallowGarbageCollection& no_gc) {
   1013    MOZ_ASSERT(IsFlat());
   1014    return FlatContent(&str()->asLinear(), no_gc);
   1015  }
   1016 
   1017  static Handle<String> Flatten(Isolate* isolate, Handle<String> string);
   1018 
   1019  inline static String cast(Object object) {
   1020    String s;
   1021    MOZ_ASSERT(object.value().isString());
   1022    s.setValue(object.value());
   1023    return s;
   1024  }
   1025 
   1026  inline static bool IsOneByteRepresentationUnderneath(String string) {
   1027    return string.str()->hasLatin1Chars();
   1028  }
   1029  inline bool IsOneByteRepresentation() const {
   1030    return str()->hasLatin1Chars();
   1031  }
   1032 
   1033  std::unique_ptr<char[]> ToCString();
   1034 
   1035  template <typename Char>
   1036  base::Vector<const Char> GetCharVector(
   1037      const DisallowGarbageCollection& no_gc);
   1038 
   1039  friend class RegExpUtils;
   1040 };
   1041 
   1042 template <>
   1043 inline base::Vector<const uint8_t> String::GetCharVector(
   1044    const DisallowGarbageCollection& no_gc) {
   1045  String::FlatContent flat = GetFlatContent(no_gc);
   1046  MOZ_ASSERT(flat.IsOneByte());
   1047  return flat.ToOneByteVector();
   1048 }
   1049 
   1050 template <>
   1051 inline base::Vector<const base::uc16> String::GetCharVector(
   1052    const DisallowGarbageCollection& no_gc) {
   1053  String::FlatContent flat = GetFlatContent(no_gc);
   1054  MOZ_ASSERT(flat.IsTwoByte());
   1055  return flat.ToUC16Vector();
   1056 }
   1057 
   1058 using RegExpFlags = JS::RegExpFlags;
   1059 using RegExpFlag = JS::RegExpFlags::Flag;
   1060 
   1061 class JSRegExp {
   1062 public:
   1063  // Each capture (including the match itself) needs two registers.
   1064  static constexpr int RegistersForCaptureCount(int count) {
   1065    return (count + 1) * 2;
   1066  }
   1067 
   1068  static RegExpFlags AsRegExpFlags(RegExpFlags flags) { return flags; }
   1069  static RegExpFlags AsJSRegExpFlags(RegExpFlags flags) { return flags; }
   1070 
   1071  static Handle<String> StringFromFlags(Isolate* isolate, RegExpFlags flags);
   1072 
   1073  // ******************************
   1074  // Static constants
   1075  // ******************************
   1076 
   1077  static constexpr int kMaxCaptures = (1 << 15) - 1;
   1078 
   1079  static constexpr int kNoBacktrackLimit = 0;
   1080 };
   1081 
   1082 class IrRegExpData : public HeapObject {
   1083 public:
   1084  IrRegExpData() : HeapObject() {}
   1085  IrRegExpData(js::RegExpShared* re) { setValue(JS::PrivateGCThingValue(re)); }
   1086 
   1087  // ******************************************************
   1088  // Methods that are called from inside the implementation
   1089  // ******************************************************
   1090  void TierUpTick() { inner()->tierUpTick(); }
   1091 
   1092  Tagged<TrustedByteArray> bytecode(bool is_latin1) const {
   1093    return TrustedByteArray::cast(
   1094        Object(JS::PrivateValue(inner()->getByteCode(is_latin1))));
   1095  }
   1096 
   1097  // TODO: should we expose this?
   1098  uint32_t backtrack_limit() const { return 0; }
   1099 
   1100  static IrRegExpData cast(Object object) {
   1101    IrRegExpData regexp;
   1102    js::gc::Cell* regexpShared = object.value().toGCThing();
   1103    MOZ_ASSERT(regexpShared->is<js::RegExpShared>());
   1104    regexp.setValue(JS::PrivateGCThingValue(regexpShared));
   1105    return regexp;
   1106  }
   1107 
   1108  inline uint32_t max_register_count() const {
   1109    return inner()->getMaxRegisters();
   1110  }
   1111 
   1112  RegExpFlags flags() const { return inner()->getFlags(); }
   1113 
   1114  size_t capture_count() const {
   1115    // Subtract 1 because pairCount includes the implicit global capture.
   1116    return inner()->pairCount() - 1;
   1117  }
   1118 
   1119 private:
   1120  js::RegExpShared* inner() const {
   1121    return value().toGCThing()->as<js::RegExpShared>();
   1122  }
   1123 };
   1124 
   1125 inline bool IsUnicode(RegExpFlags flags) { return flags.unicode(); }
   1126 inline bool IsGlobal(RegExpFlags flags) { return flags.global(); }
   1127 inline bool IsIgnoreCase(RegExpFlags flags) { return flags.ignoreCase(); }
   1128 inline bool IsMultiline(RegExpFlags flags) { return flags.multiline(); }
   1129 inline bool IsDotAll(RegExpFlags flags) { return flags.dotAll(); }
   1130 inline bool IsSticky(RegExpFlags flags) { return flags.sticky(); }
   1131 inline bool IsUnicodeSets(RegExpFlags flags) { return flags.unicodeSets(); }
   1132 inline bool IsEitherUnicode(RegExpFlags flags) {
   1133  return flags.unicode() || flags.unicodeSets();
   1134 }
   1135 
   1136 inline std::optional<RegExpFlag> TryRegExpFlagFromChar(char c) {
   1137  RegExpFlag flag;
   1138 
   1139  // The parser only calls this after verifying that it's a supported flag.
   1140  if (JS::MaybeParseRegExpFlag(c, &flag)) {
   1141    return flag;
   1142  }
   1143 
   1144  return std::optional<RegExpFlag>{};
   1145 }
   1146 
   1147 inline bool operator==(const RegExpFlags& lhs, const int& rhs) {
   1148  return lhs.value() == rhs;
   1149 }
   1150 inline bool operator!=(const RegExpFlags& lhs, const int& rhs) {
   1151  return !(lhs == rhs);
   1152 }
   1153 
   1154 class Histogram {
   1155 public:
   1156  inline void AddSample(int sample) {}
   1157 };
   1158 
   1159 class Counters {
   1160 public:
   1161  Histogram* regexp_backtracks() { return &regexp_backtracks_; }
   1162 
   1163 private:
   1164  Histogram regexp_backtracks_;
   1165 };
   1166 
   1167 enum class AllocationType : uint8_t {
   1168  kYoung,  // Allocate in the nursery
   1169  kOld,    // Allocate in the tenured heap
   1170 };
   1171 
   1172 using StackGuard = Isolate;
   1173 using Factory = Isolate;
   1174 
   1175 class Isolate {
   1176 public:
   1177  Isolate(JSContext* cx) : cx_(cx) {}
   1178  ~Isolate();
   1179  bool init();
   1180 
   1181  size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
   1182 
   1183  //********** Isolate code **********//
   1184  RegExpStack* regexp_stack() const { return regexpStack_; }
   1185 
   1186  // This is called from inside no-GC code. Instead of suppressing GC
   1187  // to allocate the error, we return false from Execute and call
   1188  // ReportOverRecursed in the caller.
   1189  void StackOverflow() {}
   1190 
   1191 #ifndef V8_INTL_SUPPORT
   1192  unibrow::Mapping<unibrow::Ecma262UnCanonicalize>* jsregexp_uncanonicalize() {
   1193    return &jsregexp_uncanonicalize_;
   1194  }
   1195  unibrow::Mapping<unibrow::Ecma262Canonicalize>*
   1196  regexp_macro_assembler_canonicalize() {
   1197    return &regexp_macro_assembler_canonicalize_;
   1198  }
   1199  unibrow::Mapping<unibrow::CanonicalizationRange>* jsregexp_canonrange() {
   1200    return &jsregexp_canonrange_;
   1201  }
   1202 
   1203 private:
   1204  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize_;
   1205  unibrow::Mapping<unibrow::Ecma262Canonicalize>
   1206      regexp_macro_assembler_canonicalize_;
   1207  unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange_;
   1208 #endif  // !V8_INTL_SUPPORT
   1209 
   1210 public:
   1211  // An empty stub for telemetry we don't support
   1212  void IncreaseTotalRegexpCodeGenerated(Handle<HeapObject> code) {}
   1213 
   1214  Counters* counters() { return &counters_; }
   1215 
   1216  //********** Factory code **********//
   1217  inline Factory* factory() { return this; }
   1218 
   1219  Handle<ByteArray> NewByteArray(
   1220      int length, AllocationType allocation = AllocationType::kYoung);
   1221 
   1222  Handle<TrustedByteArray> NewTrustedByteArray(
   1223      int length, AllocationType allocation = AllocationType::kYoung);
   1224 
   1225  // Allocates a fixed array initialized with undefined values.
   1226  Handle<FixedArray> NewFixedArray(int length);
   1227 
   1228  template <typename T>
   1229  Handle<FixedIntegerArray<T>> NewFixedIntegerArray(uint32_t length);
   1230 
   1231  template <typename Char>
   1232  Handle<String> InternalizeString(const base::Vector<const Char>& str);
   1233 
   1234  //********** Stack guard code **********//
   1235  inline StackGuard* stack_guard() { return this; }
   1236 
   1237  uintptr_t real_climit() { return cx_->stackLimit(JS::StackForSystemCode); }
   1238 
   1239  // This is called from inside no-GC code. V8 runs the interrupt
   1240  // inside the no-GC code and then "manually relocates unhandlified
   1241  // references" afterwards. We just return a magic value and let the
   1242  // caller handle interrupts.
   1243  Object HandleInterrupts() {
   1244    return Object(JS::MagicValue(JS_INTERRUPT_REGEXP));
   1245  }
   1246 
   1247  JSContext* cx() const { return cx_; }
   1248 
   1249  void trace(JSTracer* trc);
   1250 
   1251  //********** Handle code **********//
   1252 
   1253  JS::Value* getHandleLocation(const JS::Value& value);
   1254 
   1255 private:
   1256  mozilla::SegmentedVector<JS::Value, 256> handleArena_;
   1257  mozilla::SegmentedVector<PseudoHandle<void>, 256> uniquePtrArena_;
   1258 
   1259  void* allocatePseudoHandle(size_t bytes);
   1260 
   1261 public:
   1262  template <typename T>
   1263  PseudoHandle<T> takeOwnership(void* ptr);
   1264  template <typename T>
   1265  PseudoHandle<T> maybeTakeOwnership(void* ptr);
   1266 
   1267  uint32_t liveHandles() const { return handleArena_.Length(); }
   1268  uint32_t livePseudoHandles() const { return uniquePtrArena_.Length(); }
   1269 
   1270 private:
   1271  void openHandleScope(HandleScope& scope) {
   1272    scope.level_ = handleArena_.Length();
   1273    scope.non_gc_level_ = uniquePtrArena_.Length();
   1274  }
   1275  void closeHandleScope(size_t prevLevel, size_t prevUniqueLevel) {
   1276    size_t currLevel = handleArena_.Length();
   1277    handleArena_.PopLastN(currLevel - prevLevel);
   1278 
   1279    size_t currUniqueLevel = uniquePtrArena_.Length();
   1280    uniquePtrArena_.PopLastN(currUniqueLevel - prevUniqueLevel);
   1281  }
   1282  friend class HandleScope;
   1283 
   1284  JSContext* cx_;
   1285  RegExpStack* regexpStack_{};
   1286  Counters counters_{};
   1287 #ifdef DEBUG
   1288 public:
   1289  uint32_t shouldSimulateInterrupt_ = 0;
   1290 #endif
   1291 };
   1292 
   1293 // Origin:
   1294 // https://github.com/v8/v8/blob/50dcf2af54ce27801a71c47c1be1d2c5e36b0dd6/src/execution/isolate.h#L1909-L1931
   1295 class StackLimitCheck {
   1296 public:
   1297  StackLimitCheck(Isolate* isolate) : cx_(isolate->cx()) {}
   1298 
   1299  // Use this to check for stack-overflows in C++ code.
   1300  bool HasOverflowed() {
   1301    js::AutoCheckRecursionLimit recursion(cx_);
   1302    bool overflowed = !recursion.checkDontReport(cx_);
   1303    if (overflowed && js::SupportDifferentialTesting()) {
   1304      // We don't report overrecursion here, but we throw an exception later
   1305      // and this still affects differential testing. Mimic ReportOverRecursed
   1306      // (the fuzzers check for this particular string).
   1307      fprintf(stderr, "ReportOverRecursed called\n");
   1308    }
   1309    return overflowed;
   1310  }
   1311 
   1312  // Use this to check for interrupt request in C++ code.
   1313  bool InterruptRequested() {
   1314    return cx_->hasPendingInterrupt(js::InterruptReason::CallbackUrgent);
   1315  }
   1316 
   1317  // Use this to check for stack-overflow when entering runtime from JS code.
   1318  bool JsHasOverflowed() {
   1319    js::AutoCheckRecursionLimit recursion(cx_);
   1320    return !recursion.checkDontReport(cx_);
   1321  }
   1322 
   1323 private:
   1324  JSContext* cx_;
   1325 };
   1326 
   1327 class ExternalReference {
   1328 public:
   1329  static const void* TopOfRegexpStack(Isolate* isolate);
   1330  static size_t SizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf,
   1331                                    RegExpStack* regexpStack);
   1332 };
   1333 
   1334 class Code : public HeapObject {
   1335 public:
   1336  uint8_t* raw_instruction_start() { return inner()->raw(); }
   1337 
   1338  static Code cast(Object object) {
   1339    Code c;
   1340    js::gc::Cell* jitCode = object.value().toGCThing();
   1341    MOZ_ASSERT(jitCode->is<js::jit::JitCode>());
   1342    c.setValue(JS::PrivateGCThingValue(jitCode));
   1343    return c;
   1344  }
   1345  js::jit::JitCode* inner() {
   1346    return value().toGCThing()->as<js::jit::JitCode>();
   1347  }
   1348 };
   1349 
   1350 // Only used in function signature of functions we don't implement
   1351 // (NativeRegExpMacroAssembler::CheckStackGuardState)
   1352 class InstructionStream {};
   1353 
   1354 // Only used in the definition of RegExpGlobalExecRunner, which we don't use.
   1355 class RegExpResultVectorScope {};
   1356 
   1357 // Origin: https://github.com/v8/v8/blob/master/src/codegen/label.h
   1358 class Label {
   1359 public:
   1360  Label() : inner_(js::jit::Label()) {}
   1361 
   1362  js::jit::Label* inner() { return &inner_; }
   1363 
   1364  void Unuse() { inner_.reset(); }
   1365 
   1366  bool is_linked() { return inner_.used(); }
   1367  bool is_bound() { return inner_.bound(); }
   1368  bool is_unused() { return !inner_.used() && !inner_.bound(); }
   1369 
   1370  int pos() { return inner_.offset(); }
   1371  void link_to(int pos) { inner_.use(pos); }
   1372  void bind_to(int pos) { inner_.bind(pos); }
   1373 
   1374 private:
   1375  js::jit::Label inner_;
   1376  js::jit::CodeOffset patchOffset_;
   1377 
   1378  friend class SMRegExpMacroAssembler;
   1379 };
   1380 
   1381 class RegExpUtils {
   1382 public:
   1383  static uint64_t AdvanceStringIndex(Tagged<String> string, uint64_t index,
   1384                                     bool unicode);
   1385 };
   1386 
   1387 #define v8_flags js::jit::JitOptions
   1388 
   1389 #define V8_USE_COMPUTED_GOTO 1
   1390 #define COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
   1391 
   1392 }  // namespace internal
   1393 }  // namespace v8
   1394 
   1395 namespace V8 {
   1396 
   1397 inline void FatalProcessOutOfMemory(v8::internal::Isolate* isolate,
   1398                                    const char* msg) {
   1399  js::AutoEnterOOMUnsafeRegion oomUnsafe;
   1400  oomUnsafe.crash(msg);
   1401 }
   1402 
   1403 }  // namespace V8
   1404 
   1405 #endif  // RegexpShim_h