tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

RegExpShim.cpp (10951B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 // Copyright 2019 the V8 project authors. All rights reserved.
      8 // Use of this source code is governed by a BSD-style license that can be
      9 // found in the LICENSE file.
     10 
     11 #include "irregexp/RegExpShim.h"
     12 
     13 #include "mozilla/MemoryReporting.h"
     14 
     15 #include <iostream>
     16 
     17 #include "irregexp/imported/regexp-macro-assembler.h"
     18 #include "irregexp/imported/regexp-stack.h"
     19 
     20 #include "vm/NativeObject-inl.h"
     21 
     22 namespace v8 {
     23 namespace internal {
     24 
     25 void PrintF(const char* format, ...) {
     26  va_list arguments;
     27  va_start(arguments, format);
     28  vprintf(format, arguments);
     29  va_end(arguments);
     30 }
     31 
     32 void PrintF(FILE* out, const char* format, ...) {
     33  va_list arguments;
     34  va_start(arguments, format);
     35  vfprintf(out, format, arguments);
     36  va_end(arguments);
     37 }
     38 
     39 StdoutStream::operator std::ostream&() const { return std::cerr; }
     40 
     41 template <typename T>
     42 std::ostream& StdoutStream::operator<<(T t) {
     43  return std::cerr << t;
     44 }
     45 
     46 template std::ostream& StdoutStream::operator<<(char const* c);
     47 
     48 // Origin:
     49 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/ostreams.cc#L120-L169
     50 // (This is a hand-simplified version.)
     51 // Writes the given character to the output escaping everything outside
     52 // of printable ASCII range.
     53 std::ostream& operator<<(std::ostream& os, const AsUC16& c) {
     54  base::uc16 v = c.value;
     55  bool isPrint = 0x20 < v && v <= 0x7e;
     56  char buf[10];
     57  const char* format = isPrint ? "%c" : (v <= 0xFF) ? "\\x%02x" : "\\u%04x";
     58  SprintfLiteral(buf, format, v);
     59  return os << buf;
     60 }
     61 std::ostream& operator<<(std::ostream& os, const AsUC32& c) {
     62  int32_t v = c.value;
     63  if (v <= String::kMaxUtf16CodeUnit) {
     64    return os << AsUC16(v);
     65  }
     66  char buf[13];
     67  SprintfLiteral(buf, "\\u{%06x}", v);
     68  return os << buf;
     69 }
     70 
     71 HandleScope::HandleScope(Isolate* isolate) : isolate_(isolate) {
     72  isolate->openHandleScope(*this);
     73 }
     74 
     75 HandleScope::~HandleScope() {
     76  isolate_->closeHandleScope(level_, non_gc_level_);
     77 }
     78 
     79 template <typename T>
     80 Handle<T>::Handle(T object, Isolate* isolate)
     81    : location_(isolate->getHandleLocation(object.value())) {}
     82 
     83 template Handle<ByteArray>::Handle(ByteArray b, Isolate* isolate);
     84 template Handle<TrustedByteArray>::Handle(TrustedByteArray b, Isolate* isolate);
     85 template Handle<HeapObject>::Handle(const JS::Value& v, Isolate* isolate);
     86 template Handle<IrRegExpData>::Handle(IrRegExpData re, Isolate* isolate);
     87 template Handle<String>::Handle(String s, Isolate* isolate);
     88 
     89 template <typename T>
     90 Handle<T>::Handle(const JS::Value& value, Isolate* isolate)
     91    : location_(isolate->getHandleLocation(value)) {
     92  T::cast(Object(value));  // Assert that value has the correct type.
     93 }
     94 
     95 JS::Value* Isolate::getHandleLocation(const JS::Value& value) {
     96  js::AutoEnterOOMUnsafeRegion oomUnsafe;
     97  if (!handleArena_.Append(value)) {
     98    oomUnsafe.crash("Irregexp handle allocation");
     99  }
    100  return &handleArena_.GetLast();
    101 }
    102 
    103 void* Isolate::allocatePseudoHandle(size_t bytes) {
    104  PseudoHandle<void> ptr;
    105  ptr.reset(js_malloc(bytes));
    106  if (!ptr) {
    107    return nullptr;
    108  }
    109  if (!uniquePtrArena_.Append(std::move(ptr))) {
    110    return nullptr;
    111  }
    112  return uniquePtrArena_.GetLast().get();
    113 }
    114 
    115 template <typename T>
    116 PseudoHandle<T> Isolate::takeOwnership(void* ptr) {
    117  PseudoHandle<T> result = maybeTakeOwnership<T>(ptr);
    118  MOZ_ASSERT(result);
    119  return result;
    120 }
    121 
    122 template <typename T>
    123 PseudoHandle<T> Isolate::maybeTakeOwnership(void* ptr) {
    124  for (auto iter = uniquePtrArena_.IterFromLast(); !iter.Done(); iter.Prev()) {
    125    auto& entry = iter.Get();
    126    if (entry.get() == ptr) {
    127      PseudoHandle<T> result;
    128      result.reset(static_cast<T*>(entry.release()));
    129      return result;
    130    }
    131  }
    132  return PseudoHandle<T>();
    133 }
    134 
    135 PseudoHandle<ByteArrayData> ByteArray::maybeTakeOwnership(Isolate* isolate) {
    136  PseudoHandle<ByteArrayData> result =
    137      isolate->maybeTakeOwnership<ByteArrayData>(value().toPrivate());
    138  setValue(JS::PrivateValue(nullptr));
    139  return result;
    140 }
    141 
    142 PseudoHandle<ByteArrayData> ByteArray::takeOwnership(Isolate* isolate) {
    143  PseudoHandle<ByteArrayData> result = maybeTakeOwnership(isolate);
    144  MOZ_ASSERT(result);
    145  return result;
    146 }
    147 
    148 void Isolate::trace(JSTracer* trc) {
    149  js::gc::AssertRootMarkingPhase(trc);
    150 
    151  for (auto iter = handleArena_.Iter(); !iter.Done(); iter.Next()) {
    152    auto& elem = iter.Get();
    153    JS::GCPolicy<JS::Value>::trace(trc, &elem, "Isolate handle arena");
    154  }
    155 }
    156 
    157 size_t Isolate::sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const {
    158  size_t size = mallocSizeOf(this);
    159 
    160  size += mallocSizeOf(regexpStack_);
    161  size += ExternalReference::SizeOfExcludingThis(mallocSizeOf, regexpStack_);
    162 
    163  size += handleArena_.SizeOfExcludingThis(mallocSizeOf);
    164  size += uniquePtrArena_.SizeOfExcludingThis(mallocSizeOf);
    165  return size;
    166 }
    167 
    168 /*static*/ Handle<String> String::Flatten(Isolate* isolate,
    169                                          Handle<String> string) {
    170  if (string->IsFlat()) {
    171    return string;
    172  }
    173  js::AutoEnterOOMUnsafeRegion oomUnsafe;
    174  JSLinearString* linear = string->str()->ensureLinear(isolate->cx());
    175  if (!linear) {
    176    oomUnsafe.crash("Irregexp String::Flatten");
    177  }
    178  return Handle<String>(JS::StringValue(linear), isolate);
    179 }
    180 
    181 // This is only used for trace messages printing the source pattern of
    182 // a regular expression. We have to return a unique_ptr, but we don't
    183 // care about the contents, so we return an empty null-terminated string.
    184 std::unique_ptr<char[]> String::ToCString() {
    185  js::AutoEnterOOMUnsafeRegion oomUnsafe;
    186 
    187  std::unique_ptr<char[]> ptr;
    188  ptr.reset(static_cast<char*>(js_malloc(1)));
    189  if (!ptr) {
    190    oomUnsafe.crash("Irregexp String::ToCString");
    191  }
    192  ptr[0] = '\0';
    193 
    194  return ptr;
    195 }
    196 
    197 bool Isolate::init() {
    198  regexpStack_ = js_new<RegExpStack>();
    199  if (!regexpStack_) {
    200    return false;
    201  }
    202  return true;
    203 }
    204 
    205 Isolate::~Isolate() {
    206  if (regexpStack_) {
    207    js_delete(regexpStack_);
    208  }
    209 }
    210 
    211 /* static */
    212 const void* ExternalReference::TopOfRegexpStack(Isolate* isolate) {
    213  return reinterpret_cast<const void*>(
    214      isolate->regexp_stack()->memory_top_address_address());
    215 }
    216 
    217 /* static */
    218 size_t ExternalReference::SizeOfExcludingThis(
    219    mozilla::MallocSizeOf mallocSizeOf, RegExpStack* regexpStack) {
    220  if (regexpStack->thread_local_.owns_memory_) {
    221    return mallocSizeOf(regexpStack->thread_local_.memory_);
    222  }
    223  return 0;
    224 }
    225 
    226 Handle<ByteArray> Isolate::NewByteArray(int length, AllocationType alloc) {
    227  MOZ_RELEASE_ASSERT(length >= 0);
    228 
    229  js::AutoEnterOOMUnsafeRegion oomUnsafe;
    230 
    231  size_t alloc_size = sizeof(ByteArrayData) + length;
    232  ByteArrayData* data =
    233      static_cast<ByteArrayData*>(allocatePseudoHandle(alloc_size));
    234  if (!data) {
    235    oomUnsafe.crash("Irregexp NewByteArray");
    236  }
    237  new (data) ByteArrayData(length);
    238 
    239  return Handle<ByteArray>(JS::PrivateValue(data), this);
    240 }
    241 
    242 Handle<TrustedByteArray> Isolate::NewTrustedByteArray(int length,
    243                                                      AllocationType alloc) {
    244  MOZ_RELEASE_ASSERT(length >= 0);
    245 
    246  js::AutoEnterOOMUnsafeRegion oomUnsafe;
    247 
    248  size_t alloc_size = sizeof(ByteArrayData) + length;
    249  ByteArrayData* data =
    250      static_cast<ByteArrayData*>(allocatePseudoHandle(alloc_size));
    251  if (!data) {
    252    oomUnsafe.crash("Irregexp NewTrustedByteArray");
    253  }
    254  new (data) ByteArrayData(length);
    255 
    256  return Handle<TrustedByteArray>(JS::PrivateValue(data), this);
    257 }
    258 
    259 Handle<FixedArray> Isolate::NewFixedArray(int length) {
    260  MOZ_RELEASE_ASSERT(length >= 0);
    261  js::AutoEnterOOMUnsafeRegion oomUnsafe;
    262  js::ArrayObject* array = js::NewDenseFullyAllocatedArray(cx(), length);
    263  if (!array) {
    264    oomUnsafe.crash("Irregexp NewFixedArray");
    265  }
    266  array->ensureDenseInitializedLength(0, length);
    267  return Handle<FixedArray>(JS::ObjectValue(*array), this);
    268 }
    269 
    270 template <typename T>
    271 Handle<FixedIntegerArray<T>> Isolate::NewFixedIntegerArray(uint32_t length) {
    272  MOZ_RELEASE_ASSERT(length < std::numeric_limits<uint32_t>::max() / sizeof(T));
    273  js::AutoEnterOOMUnsafeRegion oomUnsafe;
    274 
    275  uint32_t rawLength = length * sizeof(T);
    276  size_t allocSize = sizeof(ByteArrayData) + rawLength;
    277  ByteArrayData* data =
    278      static_cast<ByteArrayData*>(allocatePseudoHandle(allocSize));
    279  if (!data) {
    280    oomUnsafe.crash("Irregexp NewFixedIntegerArray");
    281  }
    282  new (data) ByteArrayData(rawLength);
    283 
    284  return Handle<FixedIntegerArray<T>>(JS::PrivateValue(data), this);
    285 }
    286 
    287 template <typename T>
    288 Handle<FixedIntegerArray<T>> FixedIntegerArray<T>::New(Isolate* isolate,
    289                                                       uint32_t length) {
    290  return isolate->NewFixedIntegerArray<T>(length);
    291 }
    292 
    293 template class FixedIntegerArray<uint16_t>;
    294 
    295 template <typename CharT>
    296 Handle<String> Isolate::InternalizeString(
    297    const base::Vector<const CharT>& str) {
    298  js::AutoEnterOOMUnsafeRegion oomUnsafe;
    299  JSAtom* atom = js::AtomizeChars(cx(), str.begin(), str.length());
    300  if (!atom) {
    301    oomUnsafe.crash("Irregexp InternalizeString");
    302  }
    303  return Handle<String>(JS::StringValue(atom), this);
    304 }
    305 
    306 template Handle<String> Isolate::InternalizeString(
    307    const base::Vector<const uint8_t>& str);
    308 template Handle<String> Isolate::InternalizeString(
    309    const base::Vector<const char16_t>& str);
    310 
    311 static_assert(JSRegExp::RegistersForCaptureCount(JSRegExp::kMaxCaptures) <=
    312              RegExpMacroAssembler::kMaxRegisterCount);
    313 
    314 // This function implements AdvanceStringIndex and CodePointAt:
    315 //  - https://tc39.es/ecma262/#sec-advancestringindex
    316 //  - https://tc39.es/ecma262/#sec-codepointat
    317 // The semantics are to advance 2 code units for properly paired
    318 // surrogates in unicode mode, and 1 code unit otherwise
    319 // (non-surrogates, unpaired surrogates, or non-unicode mode).
    320 uint64_t RegExpUtils::AdvanceStringIndex(Tagged<String> wrappedString,
    321                                         uint64_t index, bool unicode) {
    322  MOZ_ASSERT(index < kMaxSafeIntegerUint64);
    323  MOZ_ASSERT(wrappedString->IsFlat());
    324  JSLinearString* string = &wrappedString->str()->asLinear();
    325 
    326  if (unicode && index < string->length()) {
    327    char16_t first = string->latin1OrTwoByteChar(index);
    328    if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string->length()) {
    329      char16_t second = string->latin1OrTwoByteChar(index + 1);
    330      if (second >= 0xDC00 && second <= 0xDFFF) {
    331        return index + 2;
    332      }
    333    }
    334  }
    335 
    336  return index + 1;
    337 }
    338 
    339 // RegexpMacroAssemblerTracer::GetCode dumps the flags by first converting to
    340 // a String, then into a C string. To avoid allocating while assembling,
    341 // we just return a handle to the well-known atom "flags".
    342 Handle<String> JSRegExp::StringFromFlags(Isolate* isolate, RegExpFlags flags) {
    343  return Handle<String>(String(isolate->cx()->names().flags), isolate);
    344 }
    345 
    346 }  // namespace internal
    347 }  // namespace v8