RegExpShared.h (15789B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /** 8 * The compiled representation of a RegExp, potentially shared among RegExp 9 * instances created during separate evaluations of a single RegExp literal in 10 * source code. 11 */ 12 13 #ifndef vm_RegExpShared_h 14 #define vm_RegExpShared_h 15 16 #include "mozilla/Assertions.h" 17 #include "mozilla/MemoryReporting.h" 18 19 #include "gc/Barrier.h" 20 #include "gc/Policy.h" 21 #include "gc/ZoneAllocator.h" 22 #include "irregexp/RegExpTypes.h" 23 #include "jit/JitCode.h" 24 #include "jit/JitOptions.h" 25 #include "js/AllocPolicy.h" 26 #include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags 27 #include "js/UbiNode.h" 28 #include "js/Vector.h" 29 #include "vm/ArrayObject.h" 30 31 namespace js { 32 33 class ArrayObject; 34 class PlainObject; 35 class RegExpRealm; 36 class RegExpShared; 37 class RegExpStatics; 38 class VectorMatchPairs; 39 40 using RootedRegExpShared = JS::Rooted<RegExpShared*>; 41 using HandleRegExpShared = JS::Handle<RegExpShared*>; 42 using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>; 43 44 enum class RegExpRunStatus : int32_t { 45 Error = -1, 46 Success = 1, 47 Success_NotFound = 0, 48 }; 49 50 inline bool IsNativeRegExpEnabled() { 51 return jit::HasJitBackend() && jit::JitOptions.nativeRegExp; 52 } 53 54 /* 55 * A RegExpShared is the compiled representation of a regexp. A RegExpShared is 56 * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may 57 * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a 58 * table so that they can be reused when compiling the same regex string. 59 * 60 * To save memory, a RegExpShared is not created for a RegExpObject until it is 61 * needed for execution. When a RegExpShared needs to be created, it is looked 62 * up in a per-compartment table to allow reuse between objects. 63 * 64 * During a GC, RegExpShared instances are marked and swept like GC things. 65 * Usually, RegExpObjects clear their pointers to their RegExpShareds rather 66 * than explicitly tracing them, so that the RegExpShared and any jitcode can 67 * be reclaimed quicker. However, the RegExpShareds are traced through by 68 * objects when we are preserving jitcode in their zone, to avoid the same 69 * recompilation inefficiencies as normal Ion and baseline compilation. 70 */ 71 class RegExpShared 72 : public gc::CellWithTenuredGCPointer<gc::TenuredCell, JSAtom> { 73 friend class js::gc::CellAllocator; 74 75 public: 76 enum class Kind : uint32_t { Unparsed, Atom, RegExp }; 77 enum class CodeKind { Bytecode, Jitcode, Any }; 78 79 using ByteCode = js::irregexp::ByteArrayData; 80 using JitCodeTable = js::irregexp::ByteArray; 81 using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>; 82 83 private: 84 friend class RegExpStatics; 85 friend class RegExpZone; 86 87 struct RegExpCompilation { 88 GCPtr<jit::JitCode*> jitCode; 89 ByteCode* byteCode = nullptr; 90 91 bool compiled(CodeKind kind = CodeKind::Any) const { 92 switch (kind) { 93 case CodeKind::Bytecode: 94 return !!byteCode; 95 case CodeKind::Jitcode: 96 return !!jitCode; 97 case CodeKind::Any: 98 return !!byteCode || !!jitCode; 99 } 100 MOZ_CRASH("Unreachable"); 101 } 102 103 size_t byteCodeLength() const { 104 MOZ_ASSERT(byteCode); 105 return byteCode->length(); 106 } 107 }; 108 109 public: 110 /* Source to the RegExp, for lazy compilation. Stored in the cell header. */ 111 JSAtom* getSource() const { return headerPtr(); } 112 113 private: 114 RegExpCompilation compilationArray[2]; 115 116 uint32_t pairCount_; 117 JS::RegExpFlags flags; 118 119 RegExpShared::Kind kind_ = Kind::Unparsed; 120 GCPtr<JSAtom*> patternAtom_; 121 uint32_t maxRegisters_ = 0; 122 uint32_t ticks_ = 0; 123 124 // With duplicate named capture groups, it's possible that the number of 125 // distinct named groups is less than the total number of named captures. 126 // If they are equal, we used the namedCaptureIndices_ array directly to 127 // return the capture index corresponding to a name. If they are not equal, 128 // we use the namedCapturedSliceIndices_ array to keep track of the offsets 129 // into the namedCaptureIndices_ array where indices corresponding to a new 130 // name start. 131 // E.g. if we have a regex like /((<?a>a)|(<?a>A))(<?b>b)/, the 132 // namedCapturedIndices_ array might look like [0, 1, 2], and the 133 // namedCaptureSliceIndices_ array like [0, 2]. Then we can construct the 134 // slice corresponding to `a` as offset 0, length 2, and the slice 135 // corresponding to `b` as offset 2, length 1. 136 uint32_t numNamedCaptures_ = {}; 137 uint32_t numDistinctNamedCaptures_ = {}; 138 uint32_t* namedCaptureIndices_ = {}; 139 uint32_t* namedCaptureSliceIndices_ = {}; 140 GCPtr<PlainObject*> groupsTemplate_ = {}; 141 142 static int CompilationIndex(bool latin1) { return latin1 ? 0 : 1; } 143 144 // Tables referenced by JIT code. 145 JitCodeTables tables; 146 147 /* Internal functions. */ 148 RegExpShared(JSAtom* source, JS::RegExpFlags flags); 149 150 const RegExpCompilation& compilation(bool latin1) const { 151 return compilationArray[CompilationIndex(latin1)]; 152 } 153 154 RegExpCompilation& compilation(bool latin1) { 155 return compilationArray[CompilationIndex(latin1)]; 156 } 157 158 public: 159 ~RegExpShared() = delete; 160 161 static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res, 162 Handle<JSLinearString*> input, CodeKind code); 163 164 static RegExpRunStatus executeAtom(MutableHandleRegExpShared re, 165 Handle<JSLinearString*> input, 166 size_t start, VectorMatchPairs* matches); 167 168 // Execute this RegExp on input starting from searchIndex, filling in matches. 169 static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res, 170 Handle<JSLinearString*> input, 171 size_t searchIndex, VectorMatchPairs* matches); 172 173 // Register a table with this RegExpShared, and take ownership. 174 bool addTable(JitCodeTable table) { return tables.append(std::move(table)); } 175 176 /* Accessors */ 177 178 size_t pairCount() const { 179 MOZ_ASSERT(kind() != Kind::Unparsed); 180 return pairCount_; 181 } 182 183 RegExpShared::Kind kind() const { return kind_; } 184 185 // Use simple string matching for this regexp. 186 void useAtomMatch(Handle<JSAtom*> pattern); 187 188 // Use the regular expression engine for this regexp. 189 void useRegExpMatch(size_t parenCount); 190 191 static void InitializeNamedCaptures(JSContext* cx, HandleRegExpShared re, 192 uint32_t numNamedCaptures, 193 uint32_t numDistinctNamedCaptures, 194 Handle<PlainObject*> templateObject, 195 uint32_t* captureIndices, 196 uint32_t* captureSliceIndices); 197 PlainObject* getGroupsTemplate() { return groupsTemplate_; } 198 199 void tierUpTick(); 200 bool markedForTierUp() const; 201 202 void setByteCode(ByteCode* code, bool latin1) { 203 compilation(latin1).byteCode = code; 204 } 205 ByteCode* getByteCode(bool latin1) const { 206 return compilation(latin1).byteCode; 207 } 208 void setJitCode(jit::JitCode* code, bool latin1) { 209 compilation(latin1).jitCode = code; 210 } 211 jit::JitCode* getJitCode(bool latin1) const { 212 return compilation(latin1).jitCode; 213 } 214 uint32_t getMaxRegisters() const { return maxRegisters_; } 215 void updateMaxRegisters(uint32_t numRegisters) { 216 maxRegisters_ = std::max(maxRegisters_, numRegisters); 217 } 218 219 uint32_t numNamedCaptures() const { return numNamedCaptures_; } 220 uint32_t numDistinctNamedCaptures() const { 221 return numDistinctNamedCaptures_; 222 } 223 int32_t getNamedCaptureIndex(uint32_t idx) const { 224 MOZ_ASSERT(idx < numNamedCaptures()); 225 MOZ_ASSERT(namedCaptureIndices_); 226 MOZ_ASSERT(!namedCaptureSliceIndices_); 227 return namedCaptureIndices_[idx]; 228 } 229 230 mozilla::Span<uint32_t> getNamedCaptureIndices(uint32_t idx) const { 231 MOZ_ASSERT(idx < numDistinctNamedCaptures()); 232 MOZ_ASSERT(namedCaptureIndices_); 233 MOZ_ASSERT(namedCaptureSliceIndices_); 234 // The start of our slice is the value stored in the slice indices. 235 uint32_t* start = &namedCaptureIndices_[namedCaptureSliceIndices_[idx]]; 236 size_t length = 0; 237 if (idx + 1 < numDistinctNamedCaptures()) { 238 // If we're not the last slice index, the number of items is the 239 // difference between us and the next index. 240 length = 241 namedCaptureSliceIndices_[idx + 1] - namedCaptureSliceIndices_[idx]; 242 } else { 243 // Otherwise, it's the number of remaining capture indices. 244 length = numNamedCaptures() - namedCaptureSliceIndices_[idx]; 245 } 246 return mozilla::Span<uint32_t>(start, length); 247 } 248 249 JSAtom* patternAtom() const { return patternAtom_; } 250 251 JS::RegExpFlags getFlags() const { return flags; } 252 253 bool hasIndices() const { return flags.hasIndices(); } 254 bool global() const { return flags.global(); } 255 bool ignoreCase() const { return flags.ignoreCase(); } 256 bool multiline() const { return flags.multiline(); } 257 bool dotAll() const { return flags.dotAll(); } 258 bool unicode() const { return flags.unicode(); } 259 bool unicodeSets() const { return flags.unicodeSets(); } 260 bool sticky() const { return flags.sticky(); } 261 262 bool isCompiled(bool latin1, CodeKind codeKind = CodeKind::Any) const { 263 return compilation(latin1).compiled(codeKind); 264 } 265 bool isCompiled() const { return isCompiled(true) || isCompiled(false); } 266 267 void traceChildren(JSTracer* trc); 268 void discardJitCode(); 269 void finalize(JS::GCContext* gcx); 270 271 static size_t offsetOfSource() { return offsetOfHeaderPtr(); } 272 273 static size_t offsetOfPatternAtom() { 274 return offsetof(RegExpShared, patternAtom_); 275 } 276 277 static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); } 278 279 static size_t offsetOfPairCount() { 280 return offsetof(RegExpShared, pairCount_); 281 } 282 283 static size_t offsetOfKind() { return offsetof(RegExpShared, kind_); } 284 285 static size_t offsetOfJitCode(bool latin1) { 286 return offsetof(RegExpShared, compilationArray) + 287 (CompilationIndex(latin1) * sizeof(RegExpCompilation)) + 288 offsetof(RegExpCompilation, jitCode); 289 } 290 291 static size_t offsetOfGroupsTemplate() { 292 return offsetof(RegExpShared, groupsTemplate_); 293 } 294 295 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); 296 297 public: 298 static const JS::TraceKind TraceKind = JS::TraceKind::RegExpShared; 299 }; 300 301 class RegExpZone { 302 struct Key { 303 JSAtom* atom = nullptr; 304 JS::RegExpFlags flags = JS::RegExpFlag::NoFlags; 305 306 Key() = default; 307 Key(JSAtom* atom, JS::RegExpFlags flags) : atom(atom), flags(flags) {} 308 MOZ_IMPLICIT Key(const WeakHeapPtr<RegExpShared*>& shared) 309 : atom(shared.unbarrieredGet()->getSource()), 310 flags(shared.unbarrieredGet()->getFlags()) {} 311 312 using Lookup = Key; 313 static HashNumber hash(const Lookup& l) { 314 HashNumber hash = DefaultHasher<JSAtom*>::hash(l.atom); 315 return mozilla::AddToHash(hash, l.flags.value()); 316 } 317 static bool match(Key l, Key r) { 318 return l.atom == r.atom && l.flags == r.flags; 319 } 320 }; 321 322 /* 323 * The set of all RegExpShareds in the zone. On every GC, every RegExpShared 324 * that was not marked is deleted and removed from the set. 325 */ 326 using Set = JS::WeakCache< 327 JS::GCHashSet<WeakHeapPtr<RegExpShared*>, Key, ZoneAllocPolicy>>; 328 Set set_; 329 330 public: 331 explicit RegExpZone(Zone* zone); 332 333 ~RegExpZone() { MOZ_ASSERT(set_.empty()); } 334 335 bool empty() const { return set_.empty(); } 336 337 RegExpShared* maybeGet(JSAtom* source, JS::RegExpFlags flags) const { 338 Set::Ptr p = set_.lookup(Key(source, flags)); 339 return p ? *p : nullptr; 340 } 341 342 RegExpShared* get(JSContext* cx, Handle<JSAtom*> source, 343 JS::RegExpFlags flags); 344 345 #ifdef DEBUG 346 void clear() { set_.clear(); } 347 #endif 348 349 size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const; 350 }; 351 352 class RegExpRealm { 353 public: 354 enum ResultShapeKind { Normal, WithIndices, Indices, NumKinds }; 355 356 // Information about the last regular expression match. This is used by the 357 // static RegExp properties such as RegExp.lastParen. 358 UniquePtr<RegExpStatics> regExpStatics; 359 360 private: 361 /* 362 * The shapes used for the result object of re.exec(), if there is a result. 363 * These are used in CreateRegExpMatchResult. There are three shapes, each of 364 * which is an ArrayObject shape with some additional properties. We decide 365 * which to use based on the |hasIndices| (/d) flag. 366 * 367 * Normal: Has |index|, |input|, and |groups| properties. 368 * Used for the result object if |hasIndices| is not set. 369 * 370 * WithIndices: Has |index|, |input|, |groups|, and |indices| properties. 371 * Used for the result object if |hasIndices| is set. 372 * 373 * Indices: Has a |groups| property. If |hasIndices| is set, used 374 * for the |.indices| property of the result object. 375 */ 376 GCPtr<SharedShape*> matchResultShapes_[ResultShapeKind::NumKinds]; 377 378 SharedShape* createMatchResultShape(JSContext* cx, ResultShapeKind kind); 379 380 public: 381 explicit RegExpRealm(); 382 383 void trace(JSTracer* trc); 384 385 static const size_t MatchResultObjectIndexSlot = 0; 386 static const size_t MatchResultObjectInputSlot = 1; 387 static const size_t MatchResultObjectGroupsSlot = 2; 388 static const size_t MatchResultObjectIndicesSlot = 3; 389 390 // Number of used and allocated dynamic slots for a Normal match result 391 // object. These values are checked in createMatchResultShape. 392 static const size_t MatchResultObjectSlotSpan = 3; 393 static const size_t MatchResultObjectNumDynamicSlots = 6; 394 395 static const size_t IndicesGroupsSlot = 0; 396 397 static size_t offsetOfMatchResultObjectIndexSlot() { 398 return sizeof(Value) * MatchResultObjectIndexSlot; 399 } 400 static size_t offsetOfMatchResultObjectInputSlot() { 401 return sizeof(Value) * MatchResultObjectInputSlot; 402 } 403 static size_t offsetOfMatchResultObjectGroupsSlot() { 404 return sizeof(Value) * MatchResultObjectGroupsSlot; 405 } 406 static size_t offsetOfMatchResultObjectIndicesSlot() { 407 return sizeof(Value) * MatchResultObjectIndicesSlot; 408 } 409 410 /* Get or create the shape used for the result of .exec(). */ 411 SharedShape* getOrCreateMatchResultShape( 412 JSContext* cx, ResultShapeKind kind = ResultShapeKind::Normal) { 413 if (matchResultShapes_[kind]) { 414 return matchResultShapes_[kind]; 415 } 416 return createMatchResultShape(cx, kind); 417 } 418 419 static constexpr size_t offsetOfRegExpStatics() { 420 return offsetof(RegExpRealm, regExpStatics); 421 } 422 static constexpr size_t offsetOfNormalMatchResultShape() { 423 static_assert(sizeof(GCPtr<SharedShape*>) == sizeof(uintptr_t)); 424 return offsetof(RegExpRealm, matchResultShapes_) + 425 ResultShapeKind::Normal * sizeof(uintptr_t); 426 } 427 }; 428 429 RegExpRunStatus ExecuteRegExpAtomRaw(RegExpShared* re, 430 const JSLinearString* input, size_t start, 431 MatchPairs* matchPairs); 432 433 } /* namespace js */ 434 435 namespace JS { 436 namespace ubi { 437 438 template <> 439 class Concrete<js::RegExpShared> : TracerConcrete<js::RegExpShared> { 440 protected: 441 explicit Concrete(js::RegExpShared* ptr) 442 : TracerConcrete<js::RegExpShared>(ptr) {} 443 444 public: 445 static void construct(void* storage, js::RegExpShared* ptr) { 446 new (storage) Concrete(ptr); 447 } 448 449 CoarseType coarseType() const final { return CoarseType::Other; } 450 451 Size size(mozilla::MallocSizeOf mallocSizeOf) const override; 452 453 const char16_t* typeName() const override { return concreteTypeName; } 454 static const char16_t concreteTypeName[]; 455 }; 456 457 } // namespace ubi 458 } // namespace JS 459 460 #endif /* vm_RegExpShared_h */