RegExpNativeMacroAssembler.h (13409B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 // Copyright 2020 the V8 project authors. All rights reserved. 8 // Use of this source code is governed by a BSD-style license that can be 9 // found in the LICENSE file. 10 11 // This file implements the NativeRegExpMacroAssembler interface for 12 // SpiderMonkey. It provides the same interface as each of V8's 13 // architecture-specific implementations. 14 15 #ifndef RegexpMacroAssemblerArch_h 16 #define RegexpMacroAssemblerArch_h 17 18 #include "irregexp/imported/regexp-macro-assembler.h" 19 #include "jit/MacroAssembler.h" 20 21 namespace v8 { 22 namespace internal { 23 24 struct FrameData { 25 // Character position at the start of the input, stored as a 26 // negative offset from the end of the string (input_end_pointer_). 27 size_t inputStart; 28 29 // The backtrack_stack_pointer_ register points to the top of the stack. 30 // This points to the bottom of the backtrack stack. 31 void* backtrackStackBase; 32 33 // Copy of the input MatchPairs. 34 int32_t* matches; // pointer to capture array 35 int32_t numMatches; // size of capture array 36 }; 37 38 class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler { 39 public: 40 SMRegExpMacroAssembler(JSContext* cx, js::jit::StackMacroAssembler& masm, 41 Zone* zone, Mode mode, uint32_t num_capture_registers); 42 virtual ~SMRegExpMacroAssembler() = default; 43 44 virtual int stack_limit_slack_slot_count(); 45 virtual IrregexpImplementation Implementation(); 46 47 virtual bool Succeed(); 48 virtual void Fail(); 49 50 virtual void AdvanceCurrentPosition(int by); 51 virtual void PopCurrentPosition(); 52 virtual void PushCurrentPosition(); 53 virtual void SetCurrentPositionFromEnd(int by); 54 55 virtual void Backtrack(); 56 virtual void Bind(Label* label); 57 virtual void GoTo(Label* label); 58 virtual void PushBacktrack(Label* label); 59 60 virtual void CheckCharacter(uint32_t c, Label* on_equal); 61 virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal); 62 virtual void CheckCharacterGT(base::uc16 limit, Label* on_greater); 63 virtual void CheckCharacterLT(base::uc16 limit, Label* on_less); 64 virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask, 65 Label* on_equal); 66 virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask, 67 Label* on_not_equal); 68 virtual void CheckNotCharacterAfterMinusAnd(base::uc16 c, base::uc16 minus, 69 base::uc16 mask, 70 Label* on_not_equal); 71 virtual void CheckFixedLengthLoop(Label* on_tos_equals_current_position); 72 virtual void CheckCharacterInRange(base::uc16 from, base::uc16 to, 73 Label* on_in_range); 74 virtual void CheckCharacterNotInRange(base::uc16 from, base::uc16 to, 75 Label* on_not_in_range); 76 virtual bool CheckCharacterInRangeArray( 77 const ZoneList<CharacterRange>* ranges, Label* on_in_range); 78 virtual bool CheckCharacterNotInRangeArray( 79 const ZoneList<CharacterRange>* ranges, Label* on_not_in_range); 80 virtual void CheckAtStart(int cp_offset, Label* on_at_start); 81 virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); 82 virtual void CheckPosition(int cp_offset, Label* on_outside_input); 83 virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set); 84 virtual void SkipUntilBitInTable(int cp_offset, Handle<ByteArray> table, 85 Handle<ByteArray> nibble_table, 86 int advance_by); 87 virtual bool SkipUntilBitInTableUseSimd(int advance_by); 88 virtual bool CheckSpecialCharacterClass(StandardCharacterSet type, 89 Label* on_no_match); 90 virtual void CheckNotBackReference(int start_reg, bool read_backward, 91 Label* on_no_match); 92 virtual void CheckNotBackReferenceIgnoreCase(int start_reg, 93 bool read_backward, bool unicode, 94 Label* on_no_match); 95 96 virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, 97 bool check_bounds, int characters, 98 int eats_at_least); 99 100 virtual void AdvanceRegister(int reg, int by); 101 virtual void IfRegisterGE(int reg, int comparand, Label* if_ge); 102 virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); 103 virtual void IfRegisterEqPos(int reg, Label* if_eq); 104 virtual void PopRegister(int register_index); 105 virtual void PushRegister(int register_index, 106 StackCheckFlag check_stack_limit); 107 virtual void ReadCurrentPositionFromRegister(int reg); 108 virtual void WriteCurrentPositionToRegister(int reg, int cp_offset); 109 virtual void ReadStackPointerFromRegister(int reg); 110 virtual void WriteStackPointerToRegister(int reg); 111 virtual void SetRegister(int register_index, int to); 112 virtual void ClearRegisters(int reg_from, int reg_to); 113 114 virtual Handle<HeapObject> GetCode(Handle<String> source, RegExpFlags flags); 115 116 virtual bool CanReadUnaligned() const; 117 118 private: 119 size_t frameSize_ = 0; 120 121 void createStackFrame(); 122 void initFrameAndRegs(); 123 void successHandler(); 124 void exitHandler(); 125 void backtrackHandler(); 126 void stackOverflowHandler(); 127 128 // Push a register on the backtrack stack. 129 void Push(js::jit::Register value); 130 131 // Pop a value from the backtrack stack. 132 void Pop(js::jit::Register target); 133 134 void CheckAtStartImpl(int cp_offset, Label* on_cond, 135 js::jit::Assembler::Condition cond); 136 void CheckCharacterImpl(js::jit::Imm32 c, Label* on_cond, 137 js::jit::Assembler::Condition cond); 138 void CheckCharacterAfterAndImpl(uint32_t c, uint32_t and_with, Label* on_cond, 139 bool negate); 140 void CheckCharacterInRangeImpl(base::uc16 from, base::uc16 to, Label* on_cond, 141 js::jit::Assembler::Condition cond); 142 void CheckNotBackReferenceImpl(int start_reg, bool read_backward, 143 bool unicode, Label* on_no_match, 144 bool ignore_case); 145 void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges); 146 147 void LoadCurrentCharacterUnchecked(int cp_offset, int characters); 148 149 void JumpOrBacktrack(Label* to); 150 151 // MacroAssembler methods that take a Label can be called with a 152 // null label, which means that we should backtrack if we would jump 153 // to that label. This is a helper to avoid writing out the same 154 // logic a dozen times. 155 inline js::jit::Label* LabelOrBacktrack(Label* to) { 156 return to ? to->inner() : &backtrack_label_; 157 } 158 159 void CheckBacktrackStackLimit(); 160 161 public: 162 static bool GrowBacktrackStack(RegExpStack* regexp_stack); 163 164 static uint32_t CaseInsensitiveCompareNonUnicode(const char16_t* substring1, 165 const char16_t* substring2, 166 size_t byteLength); 167 static uint32_t CaseInsensitiveCompareUnicode(const char16_t* substring1, 168 const char16_t* substring2, 169 size_t byteLength); 170 static bool IsCharacterInRangeArray(uint32_t c, ByteArrayData* ranges); 171 172 private: 173 inline int char_size() { return static_cast<int>(mode_); } 174 inline js::jit::Scale factor() { 175 return mode_ == UC16 ? js::jit::TimesTwo : js::jit::TimesOne; 176 } 177 178 js::jit::Address inputStart() { 179 return js::jit::Address(masm_.getStackPointer(), 180 offsetof(FrameData, inputStart)); 181 } 182 js::jit::Address backtrackStackBase() { 183 return js::jit::Address(masm_.getStackPointer(), 184 offsetof(FrameData, backtrackStackBase)); 185 } 186 js::jit::Address matches() { 187 return js::jit::Address(masm_.getStackPointer(), 188 offsetof(FrameData, matches)); 189 } 190 js::jit::Address numMatches() { 191 return js::jit::Address(masm_.getStackPointer(), 192 offsetof(FrameData, numMatches)); 193 } 194 195 // The stack-pointer-relative location of a regexp register. 196 js::jit::Address register_location(int register_index) { 197 return js::jit::Address(masm_.getStackPointer(), 198 register_offset(register_index)); 199 } 200 201 int32_t register_offset(int register_index) { 202 MOZ_ASSERT(register_index >= 0 && register_index <= kMaxRegister); 203 if (num_registers_ <= register_index) { 204 num_registers_ = register_index + 1; 205 } 206 static_assert(alignof(uintptr_t) <= alignof(FrameData)); 207 return sizeof(FrameData) + register_index * sizeof(uintptr_t*); 208 } 209 210 JSContext* cx_; 211 js::jit::StackMacroAssembler& masm_; 212 213 /* 214 * This assembler uses the following registers: 215 * 216 * - current_character_: 217 * Contains the character (or characters) currently being examined. 218 * Must be loaded using LoadCurrentCharacter before using any of the 219 * dispatch methods. After a matching pass for a global regexp, 220 * temporarily stores the index of capture start. 221 * - current_position_: 222 * Current position in input *as negative byte offset from end of string*. 223 * - input_end_pointer_: 224 * Points to byte after last character in the input. current_position_ is 225 * relative to this. 226 * - backtrack_stack_pointer_: 227 * Points to tip of the (heap-allocated) backtrack stack. The stack grows 228 * downward (like the native stack). 229 * - temp0_, temp1_, temp2_: 230 * Scratch registers. 231 * 232 * The native stack pointer is used to access arguments (InputOutputData), 233 * local variables (FrameData), and irregexp's internal virtual registers 234 * (see register_location). 235 */ 236 237 js::jit::Register current_character_; 238 js::jit::Register current_position_; 239 js::jit::Register input_end_pointer_; 240 js::jit::Register backtrack_stack_pointer_; 241 js::jit::Register temp0_, temp1_, temp2_; 242 243 // These labels are used in various API calls and bound (if used) in 244 // GetCode. If we abort in the middle of a compilation, as may 245 // happen if a regexp is too big, they may be used but not 246 // bound. 247 js::jit::NonAssertingLabel entry_label_; 248 js::jit::NonAssertingLabel start_label_; 249 js::jit::NonAssertingLabel backtrack_label_; 250 js::jit::NonAssertingLabel success_label_; 251 js::jit::NonAssertingLabel exit_label_; 252 js::jit::NonAssertingLabel stack_overflow_label_; 253 js::jit::NonAssertingLabel exit_with_exception_label_; 254 255 // When we generate the code to push a backtrack label's address 256 // onto the backtrack stack, we don't know its final address. We 257 // have to patch it after linking. This is slightly delicate, as the 258 // Label itself (which is allocated on the stack) may not exist by 259 // the time we link. The approach is as follows: 260 // 261 // 1. When we push a label on the backtrack stack (PushBacktrack), 262 // we bind the label's patchOffset_ field to the offset within 263 // the code that should be overwritten. This works because each 264 // label is only pushed by a single instruction. 265 // 266 // 2. When we bind a label (Bind), we check to see if it has a 267 // bound patchOffset_. If it does, we create a LabelPatch mapping 268 // its patch offset to the offset of the label itself. 269 // 270 // 3. While linking the code, we walk the list of label patches 271 // and patch the code accordingly. 272 // 273 // 4. Finally, we patch in the code base address that is added to the 274 // label offset to calculate the actual address to jump to. 275 class LabelPatch { 276 public: 277 LabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) 278 : patchOffset_(patchOffset), labelOffset_(labelOffset) {} 279 280 js::jit::CodeOffset patchOffset_; 281 size_t labelOffset_ = 0; 282 }; 283 284 js::Vector<LabelPatch, 4, js::SystemAllocPolicy> labelPatches_; 285 void AddLabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) { 286 js::AutoEnterOOMUnsafeRegion oomUnsafe; 287 if (!labelPatches_.emplaceBack(patchOffset, labelOffset)) { 288 oomUnsafe.crash("Irregexp label patch"); 289 } 290 } 291 292 js::Vector<js::jit::CodeOffset, 4, js::SystemAllocPolicy> 293 backtrackCodeOffsetPatches_; 294 void PushBacktrackCodeOffsetPatch(js::jit::CodeOffset offset) { 295 js::AutoEnterOOMUnsafeRegion oomUnsafe; 296 if (!backtrackCodeOffsetPatches_.append(offset)) { 297 oomUnsafe.crash("Irregexp backtrack code offset patch"); 298 } 299 } 300 301 Mode mode_; 302 int num_registers_; 303 int num_capture_registers_; 304 js::jit::LiveGeneralRegisterSet savedRegisters_; 305 306 public: 307 using TableVector = 308 js::Vector<PseudoHandle<ByteArrayData>, 4, js::SystemAllocPolicy>; 309 TableVector& tables() { return tables_; } 310 311 private: 312 TableVector tables_; 313 void AddTable(PseudoHandle<ByteArrayData> table) { 314 js::AutoEnterOOMUnsafeRegion oomUnsafe; 315 if (!tables_.append(std::move(table))) { 316 oomUnsafe.crash("Irregexp table append"); 317 } 318 } 319 }; 320 321 } // namespace internal 322 } // namespace v8 323 324 #endif // RegexpMacroAssemblerArch_h