RegExpShim.cpp (10951B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 // Copyright 2019 the V8 project authors. All rights reserved. 8 // Use of this source code is governed by a BSD-style license that can be 9 // found in the LICENSE file. 10 11 #include "irregexp/RegExpShim.h" 12 13 #include "mozilla/MemoryReporting.h" 14 15 #include <iostream> 16 17 #include "irregexp/imported/regexp-macro-assembler.h" 18 #include "irregexp/imported/regexp-stack.h" 19 20 #include "vm/NativeObject-inl.h" 21 22 namespace v8 { 23 namespace internal { 24 25 void PrintF(const char* format, ...) { 26 va_list arguments; 27 va_start(arguments, format); 28 vprintf(format, arguments); 29 va_end(arguments); 30 } 31 32 void PrintF(FILE* out, const char* format, ...) { 33 va_list arguments; 34 va_start(arguments, format); 35 vfprintf(out, format, arguments); 36 va_end(arguments); 37 } 38 39 StdoutStream::operator std::ostream&() const { return std::cerr; } 40 41 template <typename T> 42 std::ostream& StdoutStream::operator<<(T t) { 43 return std::cerr << t; 44 } 45 46 template std::ostream& StdoutStream::operator<<(char const* c); 47 48 // Origin: 49 // https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/ostreams.cc#L120-L169 50 // (This is a hand-simplified version.) 51 // Writes the given character to the output escaping everything outside 52 // of printable ASCII range. 53 std::ostream& operator<<(std::ostream& os, const AsUC16& c) { 54 base::uc16 v = c.value; 55 bool isPrint = 0x20 < v && v <= 0x7e; 56 char buf[10]; 57 const char* format = isPrint ? "%c" : (v <= 0xFF) ? "\\x%02x" : "\\u%04x"; 58 SprintfLiteral(buf, format, v); 59 return os << buf; 60 } 61 std::ostream& operator<<(std::ostream& os, const AsUC32& c) { 62 int32_t v = c.value; 63 if (v <= String::kMaxUtf16CodeUnit) { 64 return os << AsUC16(v); 65 } 66 char buf[13]; 67 SprintfLiteral(buf, "\\u{%06x}", v); 68 return os << buf; 69 } 70 71 HandleScope::HandleScope(Isolate* isolate) : isolate_(isolate) { 72 isolate->openHandleScope(*this); 73 } 74 75 HandleScope::~HandleScope() { 76 isolate_->closeHandleScope(level_, non_gc_level_); 77 } 78 79 template <typename T> 80 Handle<T>::Handle(T object, Isolate* isolate) 81 : location_(isolate->getHandleLocation(object.value())) {} 82 83 template Handle<ByteArray>::Handle(ByteArray b, Isolate* isolate); 84 template Handle<TrustedByteArray>::Handle(TrustedByteArray b, Isolate* isolate); 85 template Handle<HeapObject>::Handle(const JS::Value& v, Isolate* isolate); 86 template Handle<IrRegExpData>::Handle(IrRegExpData re, Isolate* isolate); 87 template Handle<String>::Handle(String s, Isolate* isolate); 88 89 template <typename T> 90 Handle<T>::Handle(const JS::Value& value, Isolate* isolate) 91 : location_(isolate->getHandleLocation(value)) { 92 T::cast(Object(value)); // Assert that value has the correct type. 93 } 94 95 JS::Value* Isolate::getHandleLocation(const JS::Value& value) { 96 js::AutoEnterOOMUnsafeRegion oomUnsafe; 97 if (!handleArena_.Append(value)) { 98 oomUnsafe.crash("Irregexp handle allocation"); 99 } 100 return &handleArena_.GetLast(); 101 } 102 103 void* Isolate::allocatePseudoHandle(size_t bytes) { 104 PseudoHandle<void> ptr; 105 ptr.reset(js_malloc(bytes)); 106 if (!ptr) { 107 return nullptr; 108 } 109 if (!uniquePtrArena_.Append(std::move(ptr))) { 110 return nullptr; 111 } 112 return uniquePtrArena_.GetLast().get(); 113 } 114 115 template <typename T> 116 PseudoHandle<T> Isolate::takeOwnership(void* ptr) { 117 PseudoHandle<T> result = maybeTakeOwnership<T>(ptr); 118 MOZ_ASSERT(result); 119 return result; 120 } 121 122 template <typename T> 123 PseudoHandle<T> Isolate::maybeTakeOwnership(void* ptr) { 124 for (auto iter = uniquePtrArena_.IterFromLast(); !iter.Done(); iter.Prev()) { 125 auto& entry = iter.Get(); 126 if (entry.get() == ptr) { 127 PseudoHandle<T> result; 128 result.reset(static_cast<T*>(entry.release())); 129 return result; 130 } 131 } 132 return PseudoHandle<T>(); 133 } 134 135 PseudoHandle<ByteArrayData> ByteArray::maybeTakeOwnership(Isolate* isolate) { 136 PseudoHandle<ByteArrayData> result = 137 isolate->maybeTakeOwnership<ByteArrayData>(value().toPrivate()); 138 setValue(JS::PrivateValue(nullptr)); 139 return result; 140 } 141 142 PseudoHandle<ByteArrayData> ByteArray::takeOwnership(Isolate* isolate) { 143 PseudoHandle<ByteArrayData> result = maybeTakeOwnership(isolate); 144 MOZ_ASSERT(result); 145 return result; 146 } 147 148 void Isolate::trace(JSTracer* trc) { 149 js::gc::AssertRootMarkingPhase(trc); 150 151 for (auto iter = handleArena_.Iter(); !iter.Done(); iter.Next()) { 152 auto& elem = iter.Get(); 153 JS::GCPolicy<JS::Value>::trace(trc, &elem, "Isolate handle arena"); 154 } 155 } 156 157 size_t Isolate::sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const { 158 size_t size = mallocSizeOf(this); 159 160 size += mallocSizeOf(regexpStack_); 161 size += ExternalReference::SizeOfExcludingThis(mallocSizeOf, regexpStack_); 162 163 size += handleArena_.SizeOfExcludingThis(mallocSizeOf); 164 size += uniquePtrArena_.SizeOfExcludingThis(mallocSizeOf); 165 return size; 166 } 167 168 /*static*/ Handle<String> String::Flatten(Isolate* isolate, 169 Handle<String> string) { 170 if (string->IsFlat()) { 171 return string; 172 } 173 js::AutoEnterOOMUnsafeRegion oomUnsafe; 174 JSLinearString* linear = string->str()->ensureLinear(isolate->cx()); 175 if (!linear) { 176 oomUnsafe.crash("Irregexp String::Flatten"); 177 } 178 return Handle<String>(JS::StringValue(linear), isolate); 179 } 180 181 // This is only used for trace messages printing the source pattern of 182 // a regular expression. We have to return a unique_ptr, but we don't 183 // care about the contents, so we return an empty null-terminated string. 184 std::unique_ptr<char[]> String::ToCString() { 185 js::AutoEnterOOMUnsafeRegion oomUnsafe; 186 187 std::unique_ptr<char[]> ptr; 188 ptr.reset(static_cast<char*>(js_malloc(1))); 189 if (!ptr) { 190 oomUnsafe.crash("Irregexp String::ToCString"); 191 } 192 ptr[0] = '\0'; 193 194 return ptr; 195 } 196 197 bool Isolate::init() { 198 regexpStack_ = js_new<RegExpStack>(); 199 if (!regexpStack_) { 200 return false; 201 } 202 return true; 203 } 204 205 Isolate::~Isolate() { 206 if (regexpStack_) { 207 js_delete(regexpStack_); 208 } 209 } 210 211 /* static */ 212 const void* ExternalReference::TopOfRegexpStack(Isolate* isolate) { 213 return reinterpret_cast<const void*>( 214 isolate->regexp_stack()->memory_top_address_address()); 215 } 216 217 /* static */ 218 size_t ExternalReference::SizeOfExcludingThis( 219 mozilla::MallocSizeOf mallocSizeOf, RegExpStack* regexpStack) { 220 if (regexpStack->thread_local_.owns_memory_) { 221 return mallocSizeOf(regexpStack->thread_local_.memory_); 222 } 223 return 0; 224 } 225 226 Handle<ByteArray> Isolate::NewByteArray(int length, AllocationType alloc) { 227 MOZ_RELEASE_ASSERT(length >= 0); 228 229 js::AutoEnterOOMUnsafeRegion oomUnsafe; 230 231 size_t alloc_size = sizeof(ByteArrayData) + length; 232 ByteArrayData* data = 233 static_cast<ByteArrayData*>(allocatePseudoHandle(alloc_size)); 234 if (!data) { 235 oomUnsafe.crash("Irregexp NewByteArray"); 236 } 237 new (data) ByteArrayData(length); 238 239 return Handle<ByteArray>(JS::PrivateValue(data), this); 240 } 241 242 Handle<TrustedByteArray> Isolate::NewTrustedByteArray(int length, 243 AllocationType alloc) { 244 MOZ_RELEASE_ASSERT(length >= 0); 245 246 js::AutoEnterOOMUnsafeRegion oomUnsafe; 247 248 size_t alloc_size = sizeof(ByteArrayData) + length; 249 ByteArrayData* data = 250 static_cast<ByteArrayData*>(allocatePseudoHandle(alloc_size)); 251 if (!data) { 252 oomUnsafe.crash("Irregexp NewTrustedByteArray"); 253 } 254 new (data) ByteArrayData(length); 255 256 return Handle<TrustedByteArray>(JS::PrivateValue(data), this); 257 } 258 259 Handle<FixedArray> Isolate::NewFixedArray(int length) { 260 MOZ_RELEASE_ASSERT(length >= 0); 261 js::AutoEnterOOMUnsafeRegion oomUnsafe; 262 js::ArrayObject* array = js::NewDenseFullyAllocatedArray(cx(), length); 263 if (!array) { 264 oomUnsafe.crash("Irregexp NewFixedArray"); 265 } 266 array->ensureDenseInitializedLength(0, length); 267 return Handle<FixedArray>(JS::ObjectValue(*array), this); 268 } 269 270 template <typename T> 271 Handle<FixedIntegerArray<T>> Isolate::NewFixedIntegerArray(uint32_t length) { 272 MOZ_RELEASE_ASSERT(length < std::numeric_limits<uint32_t>::max() / sizeof(T)); 273 js::AutoEnterOOMUnsafeRegion oomUnsafe; 274 275 uint32_t rawLength = length * sizeof(T); 276 size_t allocSize = sizeof(ByteArrayData) + rawLength; 277 ByteArrayData* data = 278 static_cast<ByteArrayData*>(allocatePseudoHandle(allocSize)); 279 if (!data) { 280 oomUnsafe.crash("Irregexp NewFixedIntegerArray"); 281 } 282 new (data) ByteArrayData(rawLength); 283 284 return Handle<FixedIntegerArray<T>>(JS::PrivateValue(data), this); 285 } 286 287 template <typename T> 288 Handle<FixedIntegerArray<T>> FixedIntegerArray<T>::New(Isolate* isolate, 289 uint32_t length) { 290 return isolate->NewFixedIntegerArray<T>(length); 291 } 292 293 template class FixedIntegerArray<uint16_t>; 294 295 template <typename CharT> 296 Handle<String> Isolate::InternalizeString( 297 const base::Vector<const CharT>& str) { 298 js::AutoEnterOOMUnsafeRegion oomUnsafe; 299 JSAtom* atom = js::AtomizeChars(cx(), str.begin(), str.length()); 300 if (!atom) { 301 oomUnsafe.crash("Irregexp InternalizeString"); 302 } 303 return Handle<String>(JS::StringValue(atom), this); 304 } 305 306 template Handle<String> Isolate::InternalizeString( 307 const base::Vector<const uint8_t>& str); 308 template Handle<String> Isolate::InternalizeString( 309 const base::Vector<const char16_t>& str); 310 311 static_assert(JSRegExp::RegistersForCaptureCount(JSRegExp::kMaxCaptures) <= 312 RegExpMacroAssembler::kMaxRegisterCount); 313 314 // This function implements AdvanceStringIndex and CodePointAt: 315 // - https://tc39.es/ecma262/#sec-advancestringindex 316 // - https://tc39.es/ecma262/#sec-codepointat 317 // The semantics are to advance 2 code units for properly paired 318 // surrogates in unicode mode, and 1 code unit otherwise 319 // (non-surrogates, unpaired surrogates, or non-unicode mode). 320 uint64_t RegExpUtils::AdvanceStringIndex(Tagged<String> wrappedString, 321 uint64_t index, bool unicode) { 322 MOZ_ASSERT(index < kMaxSafeIntegerUint64); 323 MOZ_ASSERT(wrappedString->IsFlat()); 324 JSLinearString* string = &wrappedString->str()->asLinear(); 325 326 if (unicode && index < string->length()) { 327 char16_t first = string->latin1OrTwoByteChar(index); 328 if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string->length()) { 329 char16_t second = string->latin1OrTwoByteChar(index + 1); 330 if (second >= 0xDC00 && second <= 0xDFFF) { 331 return index + 2; 332 } 333 } 334 } 335 336 return index + 1; 337 } 338 339 // RegexpMacroAssemblerTracer::GetCode dumps the flags by first converting to 340 // a String, then into a C string. To avoid allocating while assembling, 341 // we just return a handle to the well-known atom "flags". 342 Handle<String> JSRegExp::StringFromFlags(Isolate* isolate, RegExpFlags flags) { 343 return Handle<String>(String(isolate->cx()->names().flags), isolate); 344 } 345 346 } // namespace internal 347 } // namespace v8