RegExpNativeMacroAssembler.cpp (54945B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 // Copyright 2020 the V8 project authors. All rights reserved. 8 // Use of this source code is governed by a BSD-style license that can be 9 // found in the LICENSE file. 10 11 #include "irregexp/imported/regexp-macro-assembler-arch.h" 12 #include "irregexp/imported/regexp-stack.h" 13 #include "irregexp/imported/special-case.h" 14 #include "jit/Linker.h" 15 #include "jit/PerfSpewer.h" 16 #include "vm/MatchPairs.h" 17 #include "vm/Realm.h" 18 #ifdef MOZ_VTUNE 19 # include "vtune/VTuneWrapper.h" 20 #endif 21 22 #include "jit/ABIFunctionList-inl.h" 23 #include "jit/MacroAssembler-inl.h" 24 25 namespace v8 { 26 namespace internal { 27 28 using js::MatchPairs; 29 using js::jit::AbsoluteAddress; 30 using js::jit::Address; 31 using js::jit::AllocatableGeneralRegisterSet; 32 using js::jit::Assembler; 33 using js::jit::BaseIndex; 34 using js::jit::CodeLocationLabel; 35 using js::jit::GeneralRegisterBackwardIterator; 36 using js::jit::GeneralRegisterForwardIterator; 37 using js::jit::GeneralRegisterSet; 38 using js::jit::Imm32; 39 using js::jit::ImmPtr; 40 using js::jit::ImmWord; 41 using js::jit::JitCode; 42 using js::jit::Linker; 43 using js::jit::LiveGeneralRegisterSet; 44 using js::jit::Register; 45 using js::jit::Registers; 46 using js::jit::StackMacroAssembler; 47 48 SMRegExpMacroAssembler::SMRegExpMacroAssembler(JSContext* cx, 49 StackMacroAssembler& masm, 50 Zone* zone, Mode mode, 51 uint32_t num_capture_registers) 52 : NativeRegExpMacroAssembler(cx->isolate.ref(), zone), 53 cx_(cx), 54 masm_(masm), 55 mode_(mode), 56 num_registers_(num_capture_registers), 57 num_capture_registers_(num_capture_registers) { 58 // Each capture has a start and an end register 59 MOZ_ASSERT(num_capture_registers_ % 2 == 0); 60 61 AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); 62 63 input_end_pointer_ = regs.takeAny(); 64 current_character_ = regs.takeAny(); 65 current_position_ = regs.takeAny(); 66 backtrack_stack_pointer_ = regs.takeAny(); 67 temp0_ = regs.takeAny(); 68 temp1_ = regs.takeAny(); 69 if (!regs.empty()) { 70 // Not enough registers on x86. 71 temp2_ = regs.takeAny(); 72 } 73 savedRegisters_ = js::jit::SavedNonVolatileRegisters(regs); 74 75 masm_.jump(&entry_label_); // We'll generate the entry code later 76 masm_.bind(&start_label_); // and continue from here. 77 } 78 79 int SMRegExpMacroAssembler::stack_limit_slack_slot_count() { 80 return RegExpStack::kStackLimitSlackSlotCount; 81 } 82 83 void SMRegExpMacroAssembler::AdvanceCurrentPosition(int by) { 84 if (by != 0) { 85 masm_.addPtr(Imm32(by * char_size()), current_position_); 86 } 87 } 88 89 void SMRegExpMacroAssembler::AdvanceRegister(int reg, int by) { 90 MOZ_ASSERT(reg >= 0 && reg < num_registers_); 91 if (by != 0) { 92 masm_.addPtr(Imm32(by), register_location(reg)); 93 } 94 } 95 96 void SMRegExpMacroAssembler::Backtrack() { 97 #ifdef DEBUG 98 js::jit::Label bailOut; 99 // Check for simulating interrupt 100 masm_.branch32(Assembler::NotEqual, 101 AbsoluteAddress(&cx_->isolate->shouldSimulateInterrupt_), 102 Imm32(0), &bailOut); 103 #endif 104 // Check for an interrupt. We have to restart from the beginning if we 105 // are interrupted, so we only check for urgent interrupts. 106 js::jit::Label noInterrupt; 107 masm_.branchTest32( 108 Assembler::Zero, AbsoluteAddress(cx_->addressOfInterruptBits()), 109 Imm32(uint32_t(js::InterruptReason::CallbackUrgent)), &noInterrupt); 110 #ifdef DEBUG 111 // bailing out if we have simulating interrupt flag set 112 masm_.bind(&bailOut); 113 #endif 114 masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error)), temp0_); 115 masm_.jump(&exit_label_); 116 masm_.bind(&noInterrupt); 117 118 // Pop code offset from backtrack stack, add to code base address, and jump to 119 // location. 120 Pop(temp0_); 121 PushBacktrackCodeOffsetPatch(masm_.movWithPatch(ImmPtr(nullptr), temp1_)); 122 masm_.addPtr(temp1_, temp0_); 123 masm_.jump(temp0_); 124 } 125 126 void SMRegExpMacroAssembler::Bind(Label* label) { 127 masm_.bind(label->inner()); 128 if (label->patchOffset_.bound()) { 129 AddLabelPatch(label->patchOffset_, label->pos()); 130 } 131 } 132 133 // Check if current_position + cp_offset is the input start 134 void SMRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond, 135 Assembler::Condition cond) { 136 Address addr(current_position_, cp_offset * char_size()); 137 masm_.computeEffectiveAddress(addr, temp0_); 138 139 masm_.branchPtr(cond, inputStart(), temp0_, LabelOrBacktrack(on_cond)); 140 } 141 142 void SMRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start) { 143 CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal); 144 } 145 146 void SMRegExpMacroAssembler::CheckNotAtStart(int cp_offset, 147 Label* on_not_at_start) { 148 CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual); 149 } 150 151 void SMRegExpMacroAssembler::CheckCharacterImpl(Imm32 c, Label* on_cond, 152 Assembler::Condition cond) { 153 masm_.branch32(cond, current_character_, c, LabelOrBacktrack(on_cond)); 154 } 155 156 void SMRegExpMacroAssembler::CheckCharacter(uint32_t c, Label* on_equal) { 157 CheckCharacterImpl(Imm32(c), on_equal, Assembler::Equal); 158 } 159 160 void SMRegExpMacroAssembler::CheckNotCharacter(uint32_t c, 161 Label* on_not_equal) { 162 CheckCharacterImpl(Imm32(c), on_not_equal, Assembler::NotEqual); 163 } 164 165 void SMRegExpMacroAssembler::CheckCharacterGT(base::uc16 limit, 166 Label* on_greater) { 167 CheckCharacterImpl(Imm32(limit), on_greater, Assembler::GreaterThan); 168 } 169 170 void SMRegExpMacroAssembler::CheckCharacterLT(base::uc16 limit, 171 Label* on_less) { 172 CheckCharacterImpl(Imm32(limit), on_less, Assembler::LessThan); 173 } 174 175 // Bitwise-and the current character with mask and then check for a 176 // match with c. 177 void SMRegExpMacroAssembler::CheckCharacterAfterAndImpl(uint32_t c, 178 uint32_t mask, 179 Label* on_cond, 180 bool is_not) { 181 if (c == 0) { 182 Assembler::Condition cond = is_not ? Assembler::NonZero : Assembler::Zero; 183 masm_.branchTest32(cond, current_character_, Imm32(mask), 184 LabelOrBacktrack(on_cond)); 185 } else { 186 Assembler::Condition cond = is_not ? Assembler::NotEqual : Assembler::Equal; 187 masm_.move32(Imm32(mask), temp0_); 188 masm_.and32(current_character_, temp0_); 189 masm_.branch32(cond, temp0_, Imm32(c), LabelOrBacktrack(on_cond)); 190 } 191 } 192 193 void SMRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c, uint32_t mask, 194 Label* on_equal) { 195 CheckCharacterAfterAndImpl(c, mask, on_equal, /*is_not =*/false); 196 } 197 198 void SMRegExpMacroAssembler::CheckNotCharacterAfterAnd(uint32_t c, 199 uint32_t mask, 200 Label* on_not_equal) { 201 CheckCharacterAfterAndImpl(c, mask, on_not_equal, /*is_not =*/true); 202 } 203 204 // Subtract minus from the current character, then bitwise-and the 205 // result with mask, then check for a match with c. 206 void SMRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd( 207 base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) { 208 masm_.computeEffectiveAddress(Address(current_character_, -minus), temp0_); 209 if (c == 0) { 210 masm_.branchTest32(Assembler::NonZero, temp0_, Imm32(mask), 211 LabelOrBacktrack(on_not_equal)); 212 } else { 213 masm_.and32(Imm32(mask), temp0_); 214 masm_.branch32(Assembler::NotEqual, temp0_, Imm32(c), 215 LabelOrBacktrack(on_not_equal)); 216 } 217 } 218 219 // If the current position matches the position stored on top of the backtrack 220 // stack, pops the backtrack stack and branches to the given label. 221 void SMRegExpMacroAssembler::CheckFixedLengthLoop(Label* on_equal) { 222 js::jit::Label fallthrough; 223 masm_.load32SignExtendToPtr(Address(backtrack_stack_pointer_, 0), temp0_); 224 masm_.branchPtr(Assembler::NotEqual, temp0_, current_position_, &fallthrough); 225 masm_.addPtr(Imm32(sizeof(int32_t)), backtrack_stack_pointer_); // Pop. 226 JumpOrBacktrack(on_equal); 227 masm_.bind(&fallthrough); 228 } 229 230 void SMRegExpMacroAssembler::CheckCharacterInRangeImpl( 231 base::uc16 from, base::uc16 to, Label* on_cond, Assembler::Condition cond) { 232 // x is in [from,to] if unsigned(x - from) <= to - from 233 masm_.computeEffectiveAddress(Address(current_character_, -from), temp0_); 234 masm_.branch32(cond, temp0_, Imm32(to - from), LabelOrBacktrack(on_cond)); 235 } 236 237 void SMRegExpMacroAssembler::CheckCharacterInRange(base::uc16 from, 238 base::uc16 to, 239 Label* on_in_range) { 240 CheckCharacterInRangeImpl(from, to, on_in_range, Assembler::BelowOrEqual); 241 } 242 243 void SMRegExpMacroAssembler::CheckCharacterNotInRange(base::uc16 from, 244 base::uc16 to, 245 Label* on_not_in_range) { 246 CheckCharacterInRangeImpl(from, to, on_not_in_range, Assembler::Above); 247 } 248 249 /* static */ 250 bool SMRegExpMacroAssembler::IsCharacterInRangeArray(uint32_t c, 251 ByteArrayData* ranges) { 252 js::AutoUnsafeCallWithABI unsafe; 253 MOZ_ASSERT(ranges->length() % sizeof(uint16_t) == 0); 254 uint32_t length = ranges->length() / sizeof(uint16_t); 255 MOZ_ASSERT(length > 0); 256 257 // Fast paths. 258 if (c < ranges->getTyped<uint16_t>(0)) { 259 // |c| is lower than the start of the first range. 260 // It is not in the range array. 261 return false; 262 } 263 if (c >= ranges->getTyped<uint16_t>(length - 1)) { 264 // |c| is higher than the last entry. If the table contains an odd 265 // number of entries, the last range is open-ended, so |c| is in 266 // the range array iff |length| is odd. 267 return (length % 2) != 0; 268 } 269 270 // |ranges| is stored as an interval list: an ordered list of 271 // starting points, where every even index marks the beginning of a 272 // range of characters that are included, and every odd index marks 273 // the beginning of a range of characters that are excluded. For 274 // example, the set [1,2,3,7,8,9] would be represented as the 275 // range array [1,4,7,10]. If |ranges| has an odd number of entries, 276 // the last included range is open-ended (so the set containing 277 // every character would be represented as [0]). 278 // 279 // Because of the symmetry between included and excluded ranges, we 280 // can do a binary search for the index in |ranges| with the value 281 // closest to but not exceeding |c|. If that index is even, |c| is 282 // in an included range. If that index is odd, |c| is in an excluded 283 // range. 284 uint32_t lower = 0; 285 uint32_t upper = length; 286 uint32_t mid = 0; 287 do { 288 mid = lower + (upper - lower) / 2; 289 const base::uc16 elem = ranges->getTyped<uint16_t>(mid); 290 if (c < elem) { 291 upper = mid; 292 } else if (c > elem) { 293 lower = mid + 1; 294 } else { 295 break; 296 } 297 } while (lower < upper); 298 uint32_t rangeIndex = c < ranges->getTyped<uint16_t>(mid) ? mid - 1 : mid; 299 300 // Included ranges start at even indices and end at odd indices. 301 return rangeIndex % 2 == 0; 302 } 303 304 void SMRegExpMacroAssembler::CallIsCharacterInRangeArray( 305 const ZoneList<CharacterRange>* ranges) { 306 Handle<ByteArray> rangeArray = GetOrAddRangeArray(ranges); 307 masm_.movePtr(ImmPtr(rangeArray->inner()), temp0_); 308 309 // Save volatile regs. Temp regs don't need to be saved. 310 LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); 311 volatileRegs.takeUnchecked(temp0_); 312 volatileRegs.takeUnchecked(temp1_); 313 if (temp2_ != js::jit::InvalidReg) { 314 volatileRegs.takeUnchecked(temp2_); 315 } 316 masm_.PushRegsInMask(volatileRegs); 317 318 using Fn = bool (*)(uint32_t, ByteArrayData*); 319 masm_.setupUnalignedABICall(temp1_); 320 masm_.passABIArg(current_character_); 321 masm_.passABIArg(temp0_); 322 323 masm_.callWithABI<Fn, ::js::irregexp::IsCharacterInRangeArray>(); 324 masm_.storeCallBoolResult(temp1_); 325 masm_.PopRegsInMask(volatileRegs); 326 327 // GetOrAddRangeArray caches previously seen range arrays to reduce 328 // memory usage, so this may not be the first time we've seen this 329 // range array. We only need to transfer ownership from the 330 // HandleScope to the |tables_| vector once. 331 PseudoHandle<ByteArrayData> rawRangeArray = 332 rangeArray->maybeTakeOwnership(isolate()); 333 if (rawRangeArray) { 334 AddTable(std::move(rawRangeArray)); 335 } 336 } 337 338 bool SMRegExpMacroAssembler::CheckCharacterInRangeArray( 339 const ZoneList<CharacterRange>* ranges, Label* on_in_range) { 340 CallIsCharacterInRangeArray(ranges); 341 masm_.branchTest32(Assembler::NonZero, temp1_, temp1_, 342 LabelOrBacktrack(on_in_range)); 343 return true; 344 } 345 346 bool SMRegExpMacroAssembler::CheckCharacterNotInRangeArray( 347 const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) { 348 CallIsCharacterInRangeArray(ranges); 349 masm_.branchTest32(Assembler::Zero, temp1_, temp1_, 350 LabelOrBacktrack(on_not_in_range)); 351 return true; 352 } 353 354 void SMRegExpMacroAssembler::CheckBitInTable(Handle<ByteArray> table, 355 Label* on_bit_set) { 356 // Claim ownership of the ByteArray from the current HandleScope. 357 // ByteArrays are allocated on the C++ heap and are (eventually) 358 // owned by the RegExpShared. 359 PseudoHandle<ByteArrayData> rawTable = table->takeOwnership(isolate()); 360 361 masm_.movePtr(ImmPtr(rawTable->data()), temp0_); 362 363 masm_.move32(Imm32(kTableMask), temp1_); 364 masm_.and32(current_character_, temp1_); 365 366 masm_.load8ZeroExtend(BaseIndex(temp0_, temp1_, js::jit::TimesOne), temp0_); 367 masm_.branchTest32(Assembler::NonZero, temp0_, temp0_, 368 LabelOrBacktrack(on_bit_set)); 369 370 // Transfer ownership of |rawTable| to the |tables_| vector. 371 AddTable(std::move(rawTable)); 372 } 373 374 void SMRegExpMacroAssembler::SkipUntilBitInTable(int cp_offset, 375 Handle<ByteArray> table, 376 Handle<ByteArray> nibble_table, 377 int advance_by) { 378 // Claim ownership of the ByteArray from the current HandleScope. 379 // ByteArrays are allocated on the C++ heap and are (eventually) 380 // owned by the RegExpShared. 381 PseudoHandle<ByteArrayData> rawTable = table->takeOwnership(isolate()); 382 383 // TODO: SIMD support (bug 1928862). 384 MOZ_ASSERT(!SkipUntilBitInTableUseSimd(advance_by)); 385 386 // Scalar version. 387 Register tableReg = temp0_; 388 masm_.movePtr(ImmPtr(rawTable->data()), tableReg); 389 390 Label cont; 391 js::jit::Label scalarRepeat; 392 masm_.bind(&scalarRepeat); 393 CheckPosition(cp_offset, &cont); 394 LoadCurrentCharacterUnchecked(cp_offset, 1); 395 396 Register index = current_character_; 397 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) { 398 index = temp1_; 399 masm_.and32(Imm32(kTableMask), current_character_, index); 400 } 401 402 masm_.load8ZeroExtend(BaseIndex(tableReg, index, js::jit::TimesOne), index); 403 masm_.branchTest32(Assembler::NonZero, index, index, cont.inner()); 404 AdvanceCurrentPosition(advance_by); 405 masm_.jump(&scalarRepeat); 406 407 masm_.bind(cont.inner()); 408 409 // Transfer ownership of |rawTable| to the |tables_| vector. 410 AddTable(std::move(rawTable)); 411 } 412 413 bool SMRegExpMacroAssembler::SkipUntilBitInTableUseSimd(int advance_by) { 414 // V8 found that using SIMD instead of the scalar version was only 415 // faster when we are advancing by 1 byte per iteration. 416 bool simdEnabled = false; 417 return simdEnabled && advance_by * char_size() == 1; 418 } 419 420 void SMRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg, 421 bool read_backward, 422 bool unicode, 423 Label* on_no_match, 424 bool ignore_case) { 425 js::jit::Label fallthrough; 426 427 // Captures are stored as a sequential pair of registers. 428 // Find the length of the back-referenced capture and load the 429 // capture's start index into current_character_. 430 masm_.loadPtr(register_location(start_reg), // index of start 431 current_character_); 432 masm_.loadPtr(register_location(start_reg + 1), temp0_); // index of end 433 masm_.subPtr(current_character_, temp0_); // length of capture 434 435 // Capture registers are either both set or both cleared. 436 // If the capture length is zero, then the capture is either empty or cleared. 437 // Fall through in both cases. 438 masm_.branchPtr(Assembler::Equal, temp0_, ImmWord(0), &fallthrough); 439 440 // Check that there are sufficient characters left in the input. 441 if (read_backward) { 442 // If start + len > current, there isn't enough room for a 443 // lookbehind backreference. 444 masm_.loadPtr(inputStart(), temp1_); 445 masm_.addPtr(temp0_, temp1_); 446 masm_.branchPtr(Assembler::GreaterThan, temp1_, current_position_, 447 LabelOrBacktrack(on_no_match)); 448 } else { 449 // current_position_ is the negative offset from the end. 450 // If current + len > 0, there isn't enough room for a backreference. 451 masm_.movePtr(current_position_, temp1_); 452 masm_.addPtr(temp0_, temp1_); 453 masm_.branchPtr(Assembler::GreaterThan, temp1_, ImmWord(0), 454 LabelOrBacktrack(on_no_match)); 455 } 456 457 if (mode_ == UC16 && ignore_case) { 458 // We call a helper function for case-insensitive non-latin1 strings. 459 460 // Save volatile regs. temp1_, temp2_, and current_character_ 461 // don't need to be saved. current_position_ needs to be saved 462 // even if it's non-volatile, because we modify it to use as an argument. 463 LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); 464 volatileRegs.addUnchecked(current_position_); 465 volatileRegs.takeUnchecked(temp1_); 466 if (temp2_ != js::jit::InvalidReg) { 467 volatileRegs.takeUnchecked(temp2_); 468 } 469 volatileRegs.takeUnchecked(current_character_); 470 masm_.PushRegsInMask(volatileRegs); 471 472 // Parameters are 473 // Address captured - Address of captured substring's start. 474 // Address current - Address of current character position. 475 // size_t byte_length - length of capture (in bytes) 476 477 // Compute |captured| 478 masm_.addPtr(input_end_pointer_, current_character_); 479 480 // Compute |current| 481 masm_.addPtr(input_end_pointer_, current_position_); 482 if (read_backward) { 483 // Offset by length when matching backwards. 484 masm_.subPtr(temp0_, current_position_); 485 } 486 487 using Fn = uint32_t (*)(const char16_t*, const char16_t*, size_t); 488 masm_.setupUnalignedABICall(temp1_); 489 masm_.passABIArg(current_character_); 490 masm_.passABIArg(current_position_); 491 masm_.passABIArg(temp0_); 492 493 if (unicode) { 494 masm_.callWithABI<Fn, ::js::irregexp::CaseInsensitiveCompareUnicode>(); 495 } else { 496 masm_.callWithABI<Fn, ::js::irregexp::CaseInsensitiveCompareNonUnicode>(); 497 } 498 masm_.storeCallInt32Result(temp1_); 499 masm_.PopRegsInMask(volatileRegs); 500 masm_.branchTest32(Assembler::Zero, temp1_, temp1_, 501 LabelOrBacktrack(on_no_match)); 502 503 // On success, advance position by length of capture 504 if (read_backward) { 505 masm_.subPtr(temp0_, current_position_); 506 } else { 507 masm_.addPtr(temp0_, current_position_); 508 } 509 510 masm_.bind(&fallthrough); 511 return; 512 } 513 514 // We will be modifying current_position_. Save it in case the match fails. 515 masm_.push(current_position_); 516 517 // Compute start of capture string 518 masm_.addPtr(input_end_pointer_, current_character_); 519 520 // Compute start of match string 521 masm_.addPtr(input_end_pointer_, current_position_); 522 if (read_backward) { 523 // Offset by length when matching backwards. 524 masm_.subPtr(temp0_, current_position_); 525 } 526 527 // Compute end of match string 528 masm_.addPtr(current_position_, temp0_); 529 530 Register nextCaptureChar = temp1_; 531 Register nextMatchChar = temp2_; 532 533 if (temp2_ == js::jit::InvalidReg) { 534 masm_.push(backtrack_stack_pointer_); 535 nextMatchChar = backtrack_stack_pointer_; 536 } 537 538 js::jit::Label success; 539 js::jit::Label fail; 540 js::jit::Label loop; 541 masm_.bind(&loop); 542 543 // Load next character from each string. 544 if (mode_ == LATIN1) { 545 masm_.load8ZeroExtend(Address(current_character_, 0), nextCaptureChar); 546 masm_.load8ZeroExtend(Address(current_position_, 0), nextMatchChar); 547 } else { 548 masm_.load16ZeroExtend(Address(current_character_, 0), nextCaptureChar); 549 masm_.load16ZeroExtend(Address(current_position_, 0), nextMatchChar); 550 } 551 552 if (ignore_case) { 553 MOZ_ASSERT(mode_ == LATIN1); 554 // Try exact match. 555 js::jit::Label loop_increment; 556 masm_.branch32(Assembler::Equal, nextCaptureChar, nextMatchChar, 557 &loop_increment); 558 559 // Mismatch. Try case-insensitive match. 560 // Force the capture character to lower case (by setting bit 0x20) 561 // then check to see if it is a letter. 562 js::jit::Label convert_match; 563 masm_.or32(Imm32(0x20), nextCaptureChar); 564 565 // Check if it is in [a,z]. 566 masm_.computeEffectiveAddress(Address(nextCaptureChar, -'a'), 567 nextMatchChar); 568 masm_.branch32(Assembler::BelowOrEqual, nextMatchChar, Imm32('z' - 'a'), 569 &convert_match); 570 // Check for values in range [224,254]. 571 // Exclude 247 (U+00F7 DIVISION SIGN). 572 masm_.sub32(Imm32(224 - 'a'), nextMatchChar); 573 masm_.branch32(Assembler::Above, nextMatchChar, Imm32(254 - 224), &fail); 574 masm_.branch32(Assembler::Equal, nextMatchChar, Imm32(247 - 224), &fail); 575 576 // Capture character is lower case. Convert match character 577 // to lower case and compare. 578 masm_.bind(&convert_match); 579 masm_.load8ZeroExtend(Address(current_position_, 0), nextMatchChar); 580 masm_.or32(Imm32(0x20), nextMatchChar); 581 masm_.branch32(Assembler::NotEqual, nextCaptureChar, nextMatchChar, &fail); 582 583 masm_.bind(&loop_increment); 584 } else { 585 // Fail if characters do not match. 586 masm_.branch32(Assembler::NotEqual, nextCaptureChar, nextMatchChar, &fail); 587 } 588 589 // Increment pointers into match and capture strings. 590 masm_.addPtr(Imm32(char_size()), current_character_); 591 masm_.addPtr(Imm32(char_size()), current_position_); 592 593 // Loop if we have not reached the end of the match string. 594 masm_.branchPtr(Assembler::Below, current_position_, temp0_, &loop); 595 masm_.jump(&success); 596 597 // If we fail, restore current_position_ and branch. 598 masm_.bind(&fail); 599 if (temp2_ == js::jit::InvalidReg) { 600 // Restore backtrack_stack_pointer_ when it was used as a temp register. 601 masm_.pop(backtrack_stack_pointer_); 602 } 603 masm_.pop(current_position_); 604 JumpOrBacktrack(on_no_match); 605 606 masm_.bind(&success); 607 608 if (temp2_ == js::jit::InvalidReg) { 609 // Restore backtrack_stack_pointer_ when it was used as a temp register. 610 masm_.pop(backtrack_stack_pointer_); 611 } 612 // Drop saved value of current_position_ 613 masm_.addToStackPtr(Imm32(sizeof(uintptr_t))); 614 615 // current_position_ is a pointer. Convert it back to an offset. 616 masm_.subPtr(input_end_pointer_, current_position_); 617 if (read_backward) { 618 // Subtract match length if we matched backward 619 masm_.addPtr(register_location(start_reg), current_position_); 620 masm_.subPtr(register_location(start_reg + 1), current_position_); 621 } 622 623 masm_.bind(&fallthrough); 624 } 625 626 // Branch if a back-reference does not match a previous capture. 627 void SMRegExpMacroAssembler::CheckNotBackReference(int start_reg, 628 bool read_backward, 629 Label* on_no_match) { 630 CheckNotBackReferenceImpl(start_reg, read_backward, /*unicode = */ false, 631 on_no_match, /*ignore_case = */ false); 632 } 633 634 void SMRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase( 635 int start_reg, bool read_backward, bool unicode, Label* on_no_match) { 636 CheckNotBackReferenceImpl(start_reg, read_backward, unicode, on_no_match, 637 /*ignore_case = */ true); 638 } 639 640 // Checks whether the given offset from the current position is 641 // inside the input string. 642 void SMRegExpMacroAssembler::CheckPosition(int cp_offset, 643 Label* on_outside_input) { 644 // Note: current_position_ is a (negative) byte offset relative to 645 // the end of the input string. 646 if (cp_offset >= 0) { 647 // end + current + offset >= end 648 // <=> current + offset >= 0 649 // <=> current >= -offset 650 masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_, 651 ImmWord(-cp_offset * char_size()), 652 LabelOrBacktrack(on_outside_input)); 653 } else { 654 // Compute offset position 655 masm_.computeEffectiveAddress( 656 Address(current_position_, cp_offset * char_size()), temp0_); 657 658 // Compare to start of input. 659 masm_.branchPtr(Assembler::GreaterThan, inputStart(), temp0_, 660 LabelOrBacktrack(on_outside_input)); 661 } 662 } 663 664 // This function attempts to generate special case code for character classes. 665 // Returns true if a special case is generated. 666 // Otherwise returns false and generates no code. 667 bool SMRegExpMacroAssembler::CheckSpecialCharacterClass( 668 StandardCharacterSet type, Label* on_no_match) { 669 js::jit::Label* no_match = LabelOrBacktrack(on_no_match); 670 671 // Note: throughout this function, range checks (c in [min, max]) 672 // are implemented by an unsigned (c - min) <= (max - min) check. 673 switch (type) { 674 case StandardCharacterSet::kWhitespace: { 675 // Match space-characters 676 if (mode_ != LATIN1) { 677 return false; 678 } 679 js::jit::Label success; 680 // One byte space characters are ' ', '\t'..'\r', and '\u00a0' (NBSP). 681 682 // Check ' ' 683 masm_.branch32(Assembler::Equal, current_character_, Imm32(' '), 684 &success); 685 686 // Check '\t'..'\r' 687 masm_.computeEffectiveAddress(Address(current_character_, -'\t'), temp0_); 688 masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('\r' - '\t'), 689 &success); 690 691 // Check \u00a0. 692 masm_.branch32(Assembler::NotEqual, temp0_, Imm32(0x00a0 - '\t'), 693 no_match); 694 695 masm_.bind(&success); 696 return true; 697 } 698 case StandardCharacterSet::kNotWhitespace: 699 // The emitted code for generic character classes is good enough. 700 return false; 701 case StandardCharacterSet::kDigit: 702 // Match latin1 digits ('0'-'9') 703 masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_); 704 masm_.branch32(Assembler::Above, temp0_, Imm32('9' - '0'), no_match); 705 return true; 706 case StandardCharacterSet::kNotDigit: 707 // Match anything except latin1 digits ('0'-'9') 708 masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_); 709 masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('9' - '0'), 710 no_match); 711 return true; 712 case StandardCharacterSet::kNotLineTerminator: 713 // Match non-newlines. This excludes '\n' (0x0a), '\r' (0x0d), 714 // U+2028 LINE SEPARATOR, and U+2029 PARAGRAPH SEPARATOR. 715 // See https://tc39.es/ecma262/#prod-LineTerminator 716 717 // To test for 0x0a and 0x0d efficiently, we XOR the input with 1. 718 // This converts 0x0a to 0x0b, and 0x0d to 0x0c, allowing us to 719 // test for the contiguous range 0x0b..0x0c. 720 masm_.xor32(Imm32(0x01), current_character_, temp0_); 721 masm_.sub32(Imm32(0x0b), temp0_); 722 masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b), 723 no_match); 724 725 if (mode_ == UC16) { 726 // Compare original value to 0x2028 and 0x2029, using the already 727 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 728 // 0x201d (0x2028 - 0x0b) or 0x201e. 729 masm_.sub32(Imm32(0x2028 - 0x0b), temp0_); 730 masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x2029 - 0x2028), 731 no_match); 732 } 733 return true; 734 case StandardCharacterSet::kWord: 735 // \w matches the set of 63 characters defined in Runtime Semantics: 736 // WordCharacters. We use a static lookup table, which is defined in 737 // regexp-macro-assembler.cc. 738 // Note: if both Unicode and IgnoreCase are true, \w matches a 739 // larger set of characters. That case is handled elsewhere. 740 if (mode_ != LATIN1) { 741 masm_.branch32(Assembler::Above, current_character_, Imm32('z'), 742 no_match); 743 } 744 static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar); 745 masm_.movePtr(ImmPtr(word_character_map), temp0_); 746 masm_.load8ZeroExtend( 747 BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_); 748 masm_.branchTest32(Assembler::Zero, temp0_, temp0_, no_match); 749 return true; 750 case StandardCharacterSet::kNotWord: { 751 // See 'w' above. 752 js::jit::Label done; 753 if (mode_ != LATIN1) { 754 masm_.branch32(Assembler::Above, current_character_, Imm32('z'), &done); 755 } 756 static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar); 757 masm_.movePtr(ImmPtr(word_character_map), temp0_); 758 masm_.load8ZeroExtend( 759 BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_); 760 masm_.branchTest32(Assembler::NonZero, temp0_, temp0_, no_match); 761 if (mode_ != LATIN1) { 762 masm_.bind(&done); 763 } 764 return true; 765 } 766 //////////////////////////////////////////////////////////////////////// 767 // Non-standard classes (with no syntactic shorthand) used internally // 768 //////////////////////////////////////////////////////////////////////// 769 case StandardCharacterSet::kEverything: 770 // Match any character 771 return true; 772 case StandardCharacterSet::kLineTerminator: 773 // Match newlines. The opposite of '.'. See '.' above. 774 masm_.xor32(Imm32(0x01), current_character_, temp0_); 775 masm_.sub32(Imm32(0x0b), temp0_); 776 if (mode_ == LATIN1) { 777 masm_.branch32(Assembler::Above, temp0_, Imm32(0x0c - 0x0b), no_match); 778 } else { 779 MOZ_ASSERT(mode_ == UC16); 780 js::jit::Label done; 781 masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b), 782 &done); 783 784 // Compare original value to 0x2028 and 0x2029, using the already 785 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 786 // 0x201d (0x2028 - 0x0b) or 0x201e. 787 masm_.sub32(Imm32(0x2028 - 0x0b), temp0_); 788 masm_.branch32(Assembler::Above, temp0_, Imm32(0x2029 - 0x2028), 789 no_match); 790 masm_.bind(&done); 791 } 792 return true; 793 } 794 return false; 795 } 796 797 void SMRegExpMacroAssembler::Fail() { 798 masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Success_NotFound)), 799 temp0_); 800 masm_.jump(&exit_label_); 801 } 802 803 void SMRegExpMacroAssembler::GoTo(Label* to) { 804 masm_.jump(LabelOrBacktrack(to)); 805 } 806 807 void SMRegExpMacroAssembler::IfRegisterGE(int reg, int comparand, 808 Label* if_ge) { 809 masm_.branchPtr(Assembler::GreaterThanOrEqual, register_location(reg), 810 ImmWord(comparand), LabelOrBacktrack(if_ge)); 811 } 812 813 void SMRegExpMacroAssembler::IfRegisterLT(int reg, int comparand, 814 Label* if_lt) { 815 masm_.branchPtr(Assembler::LessThan, register_location(reg), 816 ImmWord(comparand), LabelOrBacktrack(if_lt)); 817 } 818 819 void SMRegExpMacroAssembler::IfRegisterEqPos(int reg, Label* if_eq) { 820 masm_.branchPtr(Assembler::Equal, register_location(reg), current_position_, 821 LabelOrBacktrack(if_eq)); 822 } 823 824 // This is a word-for-word identical copy of the V8 code, which is 825 // duplicated in at least nine different places in V8 (one per 826 // supported architecture) with no differences outside of comments and 827 // formatting. It should be hoisted into the superclass. Once that is 828 // done upstream, this version can be deleted. 829 void SMRegExpMacroAssembler::LoadCurrentCharacterImpl(int cp_offset, 830 Label* on_end_of_input, 831 bool check_bounds, 832 int characters, 833 int eats_at_least) { 834 // It's possible to preload a small number of characters when each success 835 // path requires a large number of characters, but not the reverse. 836 MOZ_ASSERT(eats_at_least >= characters); 837 MOZ_ASSERT(cp_offset < (1 << 30)); // Be sane! (And ensure negation works) 838 839 if (check_bounds) { 840 if (cp_offset >= 0) { 841 CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); 842 } else { 843 CheckPosition(cp_offset, on_end_of_input); 844 } 845 } 846 LoadCurrentCharacterUnchecked(cp_offset, characters); 847 } 848 849 // Load the character (or characters) at the specified offset from the 850 // current position. Zero-extend to 32 bits. 851 void SMRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, 852 int characters) { 853 BaseIndex address(input_end_pointer_, current_position_, js::jit::TimesOne, 854 cp_offset * char_size()); 855 if (mode_ == LATIN1) { 856 if (characters == 4) { 857 masm_.load32(address, current_character_); 858 } else if (characters == 2) { 859 masm_.load16ZeroExtend(address, current_character_); 860 } else { 861 MOZ_ASSERT(characters == 1); 862 masm_.load8ZeroExtend(address, current_character_); 863 } 864 } else { 865 MOZ_ASSERT(mode_ == UC16); 866 if (characters == 2) { 867 masm_.load32(address, current_character_); 868 } else { 869 MOZ_ASSERT(characters == 1); 870 masm_.load16ZeroExtend(address, current_character_); 871 } 872 } 873 } 874 875 void SMRegExpMacroAssembler::PopCurrentPosition() { Pop(current_position_); } 876 877 void SMRegExpMacroAssembler::PopRegister(int register_index) { 878 Pop(temp0_); 879 masm_.storePtr(temp0_, register_location(register_index)); 880 } 881 882 void SMRegExpMacroAssembler::PushBacktrack(Label* label) { 883 MOZ_ASSERT(!label->is_bound()); 884 MOZ_ASSERT(!label->patchOffset_.bound()); 885 label->patchOffset_ = masm_.movWithPatch(ImmPtr(nullptr), temp0_); 886 MOZ_ASSERT(label->patchOffset_.bound()); 887 888 Push(temp0_); 889 890 CheckBacktrackStackLimit(); 891 } 892 893 void SMRegExpMacroAssembler::PushCurrentPosition() { Push(current_position_); } 894 895 void SMRegExpMacroAssembler::PushRegister(int register_index, 896 StackCheckFlag check_stack_limit) { 897 masm_.loadPtr(register_location(register_index), temp0_); 898 Push(temp0_); 899 if (check_stack_limit) { 900 CheckBacktrackStackLimit(); 901 } 902 } 903 904 void SMRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg) { 905 masm_.loadPtr(register_location(reg), current_position_); 906 } 907 908 void SMRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg, 909 int cp_offset) { 910 if (cp_offset == 0) { 911 masm_.storePtr(current_position_, register_location(reg)); 912 } else { 913 Address addr(current_position_, cp_offset * char_size()); 914 masm_.computeEffectiveAddress(addr, temp0_); 915 masm_.storePtr(temp0_, register_location(reg)); 916 } 917 } 918 919 // Note: The backtrack stack pointer is stored in a register as an 920 // offset from the stack top, not as a bare pointer, so that it is not 921 // corrupted if the backtrack stack grows (and therefore moves). 922 void SMRegExpMacroAssembler::ReadStackPointerFromRegister(int reg) { 923 masm_.loadPtr(register_location(reg), backtrack_stack_pointer_); 924 masm_.addPtr(backtrackStackBase(), backtrack_stack_pointer_); 925 } 926 void SMRegExpMacroAssembler::WriteStackPointerToRegister(int reg) { 927 masm_.movePtr(backtrack_stack_pointer_, temp0_); 928 masm_.subPtr(backtrackStackBase(), temp0_); 929 masm_.storePtr(temp0_, register_location(reg)); 930 } 931 932 // When matching a regexp that is anchored at the end, this operation 933 // is used to try skipping the beginning of long strings. If the 934 // maximum length of a match is less than the length of the string, we 935 // can skip the initial len - max_len bytes. 936 void SMRegExpMacroAssembler::SetCurrentPositionFromEnd(int by) { 937 js::jit::Label after_position; 938 masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_, 939 ImmWord(-by * char_size()), &after_position); 940 masm_.movePtr(ImmWord(-by * char_size()), current_position_); 941 942 // On RegExp code entry (where this operation is used), the character before 943 // the current position is expected to be already loaded. 944 // We have advanced the position, so it's safe to read backwards. 945 LoadCurrentCharacterUnchecked(-1, 1); 946 masm_.bind(&after_position); 947 } 948 949 void SMRegExpMacroAssembler::SetRegister(int register_index, int to) { 950 MOZ_ASSERT(register_index >= num_capture_registers_); 951 masm_.storePtr(ImmWord(to), register_location(register_index)); 952 } 953 954 // Returns true if a regexp match can be restarted (aka the regexp is global). 955 // The return value is not used anywhere, but we implement it to be safe. 956 bool SMRegExpMacroAssembler::Succeed() { 957 masm_.jump(&success_label_); 958 return global(); 959 } 960 961 // Capture registers are initialized to input[-1] 962 void SMRegExpMacroAssembler::ClearRegisters(int reg_from, int reg_to) { 963 MOZ_ASSERT(reg_from <= reg_to); 964 masm_.loadPtr(inputStart(), temp0_); 965 masm_.subPtr(Imm32(char_size()), temp0_); 966 for (int reg = reg_from; reg <= reg_to; reg++) { 967 masm_.storePtr(temp0_, register_location(reg)); 968 } 969 } 970 971 void SMRegExpMacroAssembler::Push(Register source) { 972 MOZ_ASSERT(source != backtrack_stack_pointer_); 973 974 masm_.subPtr(Imm32(sizeof(int32_t)), backtrack_stack_pointer_); 975 masm_.store32(source, Address(backtrack_stack_pointer_, 0)); 976 } 977 978 void SMRegExpMacroAssembler::Pop(Register target) { 979 MOZ_ASSERT(target != backtrack_stack_pointer_); 980 981 masm_.load32SignExtendToPtr(Address(backtrack_stack_pointer_, 0), target); 982 masm_.addPtr(Imm32(sizeof(int32_t)), backtrack_stack_pointer_); 983 } 984 985 void SMRegExpMacroAssembler::JumpOrBacktrack(Label* to) { 986 if (to) { 987 masm_.jump(to->inner()); 988 } else { 989 Backtrack(); 990 } 991 } 992 993 // Generate a quick inline test for backtrack stack overflow. 994 // If the test fails, call an OOL handler to try growing the stack. 995 void SMRegExpMacroAssembler::CheckBacktrackStackLimit() { 996 js::jit::Label no_stack_overflow; 997 masm_.branchPtr( 998 Assembler::Below, 999 AbsoluteAddress(isolate()->regexp_stack()->limit_address_address()), 1000 backtrack_stack_pointer_, &no_stack_overflow); 1001 1002 masm_.call(&stack_overflow_label_); 1003 1004 // Exit with an exception if the call failed 1005 masm_.branchTest32(Assembler::Zero, temp0_, temp0_, 1006 &exit_with_exception_label_); 1007 1008 masm_.bind(&no_stack_overflow); 1009 } 1010 1011 // This is used to sneak an OOM through the V8 layer. 1012 static Handle<HeapObject> DummyCode() { 1013 return Handle<HeapObject>::fromHandleValue(JS::UndefinedHandleValue); 1014 } 1015 1016 // Finalize code. This is called last, so that we know how many 1017 // registers we need. 1018 Handle<HeapObject> SMRegExpMacroAssembler::GetCode(Handle<String> source, 1019 RegExpFlags flags) { 1020 if (!cx_->zone()->ensureJitZoneExists(cx_)) { 1021 return DummyCode(); 1022 } 1023 1024 masm_.bind(&entry_label_); 1025 1026 createStackFrame(); 1027 initFrameAndRegs(); 1028 1029 masm_.jump(&start_label_); 1030 1031 successHandler(); 1032 exitHandler(); 1033 backtrackHandler(); 1034 stackOverflowHandler(); 1035 1036 Linker linker(masm_); 1037 JitCode* code = linker.newCode(cx_, js::jit::CodeKind::RegExp); 1038 if (!code) { 1039 return DummyCode(); 1040 } 1041 1042 for (LabelPatch& lp : labelPatches_) { 1043 Assembler::PatchDataWithValueCheck(CodeLocationLabel(code, lp.patchOffset_), 1044 ImmPtr((void*)lp.labelOffset_), 1045 ImmPtr(nullptr)); 1046 } 1047 1048 for (js::jit::CodeOffset& offset : backtrackCodeOffsetPatches_) { 1049 Assembler::PatchDataWithValueCheck(CodeLocationLabel(code, offset), 1050 ImmPtr(code->raw()), ImmPtr(nullptr)); 1051 } 1052 1053 CollectPerfSpewerJitCodeProfile(code, "RegExp"); 1054 1055 #ifdef MOZ_VTUNE 1056 js::vtune::MarkStub(code, "RegExp"); 1057 #endif 1058 1059 return Handle<HeapObject>(JS::PrivateGCThingValue(code), isolate()); 1060 } 1061 1062 /* 1063 * The stack will have the following structure: 1064 * sp-> - FrameData 1065 * - inputStart 1066 * - backtrack stack base 1067 * - matches 1068 * - numMatches 1069 * - Registers 1070 * - Capture positions 1071 * - Scratch registers 1072 * --- frame alignment --- 1073 * - Saved register area 1074 * fp-> - Frame pointer 1075 * - Return address 1076 */ 1077 void SMRegExpMacroAssembler::createStackFrame() { 1078 #ifdef JS_CODEGEN_ARM64 1079 // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for 1080 // addressing. The register we use for PSP may however also be used by 1081 // calling code, and it is nonvolatile, so save it. Do this as a special 1082 // case first because the generic save/restore code needs the PSP to be 1083 // initialized already. 1084 MOZ_ASSERT(js::jit::PseudoStackPointer64.Is(masm_.GetStackPointer64())); 1085 masm_.Str(js::jit::PseudoStackPointer64, 1086 vixl::MemOperand(js::jit::sp, -16, vixl::PreIndex)); 1087 1088 // Initialize the PSP from the SP. 1089 masm_.initPseudoStackPtr(); 1090 #endif 1091 1092 masm_.Push(js::jit::FramePointer); 1093 masm_.moveStackPtrTo(js::jit::FramePointer); 1094 1095 // Push non-volatile registers which might be modified by jitcode. 1096 for (GeneralRegisterForwardIterator iter(savedRegisters_); iter.more(); 1097 ++iter) { 1098 masm_.Push(*iter); 1099 } 1100 1101 // The pointer to InputOutputData is passed as the first argument. 1102 // On x86 we have to load it off the stack into temp0_. 1103 // On other platforms it is already in a register. 1104 #ifdef JS_CODEGEN_X86 1105 Address ioDataAddr(js::jit::FramePointer, 2 * sizeof(void*)); 1106 masm_.loadPtr(ioDataAddr, temp0_); 1107 #else 1108 if (js::jit::IntArgReg0 != temp0_) { 1109 masm_.movePtr(js::jit::IntArgReg0, temp0_); 1110 } 1111 #endif 1112 1113 // Start a new stack frame. 1114 size_t frameBytes = sizeof(FrameData) + num_registers_ * sizeof(void*); 1115 frameSize_ = js::jit::StackDecrementForCall(js::jit::ABIStackAlignment, 1116 masm_.framePushed(), frameBytes); 1117 masm_.reserveStack(frameSize_); 1118 masm_.checkStackAlignment(); 1119 1120 // Check if we have space on the stack. Use the *NoInterrupt stack limit to 1121 // avoid failing repeatedly when the regex code is called from Ion JIT code. 1122 // (See bug 1208819) 1123 js::jit::Label stack_ok; 1124 AbsoluteAddress limit_addr(cx_->addressOfJitStackLimitNoInterrupt()); 1125 masm_.branchStackPtrRhs(Assembler::Below, limit_addr, &stack_ok); 1126 1127 // There is not enough space on the stack. Exit with an exception. 1128 masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error)), temp0_); 1129 masm_.jump(&exit_label_); 1130 1131 masm_.bind(&stack_ok); 1132 } 1133 1134 void SMRegExpMacroAssembler::initFrameAndRegs() { 1135 // At this point, an uninitialized stack frame has been created, 1136 // and the address of the InputOutputData is in temp0_. 1137 Register ioDataReg = temp0_; 1138 1139 Register matchesReg = temp1_; 1140 masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, matches)), 1141 matchesReg); 1142 1143 // Initialize output registers 1144 // Use |backtrack_stack_pointer_| as an additional temp register. This is safe 1145 // because we haven't yet written any data to |backtrack_stack_pointer_|. 1146 Register extraTemp = backtrack_stack_pointer_; 1147 1148 masm_.loadPtr(Address(matchesReg, MatchPairs::offsetOfPairs()), extraTemp); 1149 masm_.storePtr(extraTemp, matches()); 1150 masm_.load32(Address(matchesReg, MatchPairs::offsetOfPairCount()), extraTemp); 1151 masm_.store32(extraTemp, numMatches()); 1152 1153 #ifdef DEBUG 1154 // Bounds-check numMatches. Note that callers that won't look at the captures 1155 // can always pass numMatches == 1. 1156 js::jit::Label enoughRegisters; 1157 masm_.branchPtr(Assembler::Equal, extraTemp, ImmWord(1), &enoughRegisters); 1158 masm_.branchPtr(Assembler::GreaterThanOrEqual, extraTemp, 1159 ImmWord(num_capture_registers_ / 2), &enoughRegisters); 1160 masm_.assumeUnreachable("Not enough output pairs for RegExp"); 1161 masm_.bind(&enoughRegisters); 1162 #endif 1163 1164 // Load input start pointer. 1165 masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputStart)), 1166 current_position_); 1167 1168 // Load input end pointer 1169 masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputEnd)), 1170 input_end_pointer_); 1171 1172 // Set up input position to be negative offset from string end. 1173 masm_.subPtr(input_end_pointer_, current_position_); 1174 1175 // Store inputStart 1176 masm_.storePtr(current_position_, inputStart()); 1177 1178 // Load start index 1179 Register startIndexReg = temp1_; 1180 masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, startIndex)), 1181 startIndexReg); 1182 masm_.computeEffectiveAddress( 1183 BaseIndex(current_position_, startIndexReg, factor()), current_position_); 1184 1185 // Initialize current_character_. 1186 // Load newline if index is at start, or previous character otherwise. 1187 js::jit::Label start_regexp; 1188 js::jit::Label load_previous_character; 1189 masm_.branchPtr(Assembler::NotEqual, startIndexReg, ImmWord(0), 1190 &load_previous_character); 1191 masm_.movePtr(ImmWord('\n'), current_character_); 1192 masm_.jump(&start_regexp); 1193 1194 masm_.bind(&load_previous_character); 1195 LoadCurrentCharacterUnchecked(-1, 1); 1196 masm_.bind(&start_regexp); 1197 1198 // Initialize captured registers with inputStart - 1 1199 MOZ_ASSERT(num_capture_registers_ > 0); 1200 Register inputStartMinusOneReg = temp0_; 1201 masm_.loadPtr(inputStart(), inputStartMinusOneReg); 1202 masm_.subPtr(Imm32(char_size()), inputStartMinusOneReg); 1203 if (num_capture_registers_ > 8) { 1204 masm_.movePtr(ImmWord(register_offset(0)), temp1_); 1205 js::jit::Label init_loop; 1206 masm_.bind(&init_loop); 1207 masm_.storePtr(inputStartMinusOneReg, BaseIndex(masm_.getStackPointer(), 1208 temp1_, js::jit::TimesOne)); 1209 masm_.addPtr(ImmWord(sizeof(void*)), temp1_); 1210 masm_.branchPtr(Assembler::LessThanOrEqual, temp1_, 1211 ImmWord(register_offset(num_capture_registers_ - 1)), 1212 &init_loop); 1213 } else { 1214 // Unroll the loop 1215 for (int i = 0; i < num_capture_registers_; i++) { 1216 masm_.storePtr(inputStartMinusOneReg, register_location(i)); 1217 } 1218 } 1219 1220 // Initialize backtrack stack pointer 1221 masm_.loadPtr(AbsoluteAddress(ExternalReference::TopOfRegexpStack(isolate())), 1222 backtrack_stack_pointer_); 1223 masm_.storePtr(backtrack_stack_pointer_, backtrackStackBase()); 1224 } 1225 1226 // Called when we find a match. May not be generated if we can 1227 // determine ahead of time that a regexp cannot match: for example, 1228 // when compiling /\u1e9e/ for latin-1 inputs. 1229 void SMRegExpMacroAssembler::successHandler() { 1230 if (!success_label_.used()) { 1231 return; 1232 } 1233 masm_.bind(&success_label_); 1234 1235 // Copy captures to the MatchPairs pointed to by the InputOutputData. 1236 // Captures are stored as positions, which are negative byte offsets 1237 // from the end of the string. We must convert them to actual 1238 // indices. 1239 // 1240 // Index: [ 0 ][ 1 ][ 2 ][ 3 ][ 4 ][ 5 ][END] 1241 // Pos (1-byte): [-6 ][-5 ][-4 ][-3 ][-2 ][-1 ][ 0 ] // IS = -6 1242 // Pos (2-byte): [-12][-10][-8 ][-6 ][-4 ][-2 ][ 0 ] // IS = -12 1243 // 1244 // To convert a position to an index, we subtract InputStart, and 1245 // divide the result by char_size. 1246 Register matchesReg = temp1_; 1247 masm_.loadPtr(matches(), matchesReg); 1248 1249 // Use |backtrack_stack_pointer_| as an additional temp register. This is safe 1250 // because we don't read from |backtrack_stack_pointer_| after this point. 1251 Register extraTemp = backtrack_stack_pointer_; 1252 1253 Register inputStartReg = extraTemp; 1254 masm_.loadPtr(inputStart(), inputStartReg); 1255 1256 auto copyRegister = [&](int reg) { 1257 masm_.loadPtr(register_location(reg), temp0_); 1258 masm_.subPtr(inputStartReg, temp0_); 1259 if (mode_ == UC16) { 1260 masm_.rshiftPtrArithmetic(Imm32(1), temp0_); 1261 } 1262 masm_.store32(temp0_, Address(matchesReg, reg * sizeof(int32_t))); 1263 }; 1264 1265 // Copy first match pair. 1266 MOZ_ASSERT(num_capture_registers_ >= 2); 1267 copyRegister(0); 1268 copyRegister(1); 1269 1270 if (num_capture_registers_ > 2) { 1271 // We always need the first match pair to update the `lastIndex` slot, 1272 // but we can skip copying the capture groups if we won't look at them. 1273 // This also allows our caller to avoid allocating space for unused results. 1274 js::jit::Label earlyExitForTest; 1275 masm_.branch32(Assembler::Equal, numMatches(), Imm32(1), &earlyExitForTest); 1276 1277 for (int i = 2; i < num_capture_registers_; i++) { 1278 copyRegister(i); 1279 } 1280 1281 masm_.bind(&earlyExitForTest); 1282 } 1283 1284 masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Success)), temp0_); 1285 // This falls through to the exit handler. 1286 } 1287 1288 void SMRegExpMacroAssembler::exitHandler() { 1289 masm_.bind(&exit_label_); 1290 1291 if (temp0_ != js::jit::ReturnReg) { 1292 masm_.movePtr(temp0_, js::jit::ReturnReg); 1293 } 1294 1295 masm_.freeStack(frameSize_); 1296 1297 // Restore registers which were saved on entry 1298 for (GeneralRegisterBackwardIterator iter(savedRegisters_); iter.more(); 1299 ++iter) { 1300 masm_.Pop(*iter); 1301 } 1302 1303 masm_.Pop(js::jit::FramePointer); 1304 1305 #ifdef JS_CODEGEN_ARM64 1306 // Now restore the value that was in the PSP register on entry, and return. 1307 1308 // Obtain the correct SP from the PSP. 1309 masm_.Mov(js::jit::sp, js::jit::PseudoStackPointer64); 1310 1311 // Restore the saved value of the PSP register, this value is whatever the 1312 // caller had saved in it, not any actual SP value, and it must not be 1313 // overwritten subsequently. 1314 masm_.Ldr(js::jit::PseudoStackPointer64, 1315 vixl::MemOperand(js::jit::sp, 16, vixl::PostIndex)); 1316 1317 // Perform a plain Ret(), as abiret() will move SP <- PSP and that is wrong. 1318 masm_.Ret(vixl::lr); 1319 #else 1320 masm_.abiret(); 1321 #endif 1322 1323 if (exit_with_exception_label_.used()) { 1324 masm_.bind(&exit_with_exception_label_); 1325 1326 // Exit with an error result to signal thrown exception 1327 masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error)), temp0_); 1328 masm_.jump(&exit_label_); 1329 } 1330 } 1331 1332 void SMRegExpMacroAssembler::backtrackHandler() { 1333 if (!backtrack_label_.used()) { 1334 return; 1335 } 1336 masm_.bind(&backtrack_label_); 1337 Backtrack(); 1338 } 1339 1340 void SMRegExpMacroAssembler::stackOverflowHandler() { 1341 if (!stack_overflow_label_.used()) { 1342 return; 1343 } 1344 1345 js::jit::AutoCreatedBy acb(masm_, 1346 "SMRegExpMacroAssembler::stackOverflowHandler"); 1347 1348 // Called if the backtrack-stack limit has been hit. 1349 masm_.bind(&stack_overflow_label_); 1350 1351 // Load argument 1352 masm_.movePtr(ImmPtr(isolate()->regexp_stack()), temp1_); 1353 1354 // Save registers before calling C function 1355 LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); 1356 1357 #ifdef JS_USE_LINK_REGISTER 1358 masm_.pushReturnAddress(); 1359 #endif 1360 1361 // Adjust for the return address on the stack. 1362 size_t frameOffset = sizeof(void*); 1363 1364 volatileRegs.takeUnchecked(temp0_); 1365 volatileRegs.takeUnchecked(temp1_); 1366 masm_.PushRegsInMask(volatileRegs); 1367 1368 using Fn = bool (*)(RegExpStack* regexp_stack); 1369 masm_.setupUnalignedABICall(temp0_); 1370 masm_.passABIArg(temp1_); 1371 masm_.callWithABI<Fn, ::js::irregexp::GrowBacktrackStack>(); 1372 masm_.storeCallBoolResult(temp0_); 1373 1374 masm_.PopRegsInMask(volatileRegs); 1375 1376 // If GrowBacktrackStack returned false, we have failed to grow the 1377 // stack, and must exit with a stack-overflow exception. Do this in 1378 // the caller so that the stack is adjusted by our return instruction. 1379 js::jit::Label overflow_return; 1380 masm_.branchTest32(Assembler::Zero, temp0_, temp0_, &overflow_return); 1381 1382 // Otherwise, store the new backtrack stack base and recompute the new 1383 // top of the stack. 1384 Address bsbAddress(masm_.getStackPointer(), 1385 offsetof(FrameData, backtrackStackBase) + frameOffset); 1386 masm_.subPtr(bsbAddress, backtrack_stack_pointer_); 1387 1388 masm_.loadPtr(AbsoluteAddress(ExternalReference::TopOfRegexpStack(isolate())), 1389 temp1_); 1390 masm_.storePtr(temp1_, bsbAddress); 1391 masm_.addPtr(temp1_, backtrack_stack_pointer_); 1392 1393 // Resume execution in calling code. 1394 masm_.bind(&overflow_return); 1395 masm_.ret(); 1396 } 1397 1398 // This is only used by tracing code. 1399 // The return value doesn't matter. 1400 RegExpMacroAssembler::IrregexpImplementation 1401 SMRegExpMacroAssembler::Implementation() { 1402 return kBytecodeImplementation; 1403 } 1404 1405 // Compare two strings in `/i` mode (ignoreCase, but not unicode). 1406 /*static */ 1407 uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareNonUnicode( 1408 const char16_t* substring1, const char16_t* substring2, size_t byteLength) { 1409 js::AutoUnsafeCallWithABI unsafe; 1410 1411 MOZ_ASSERT(byteLength % sizeof(char16_t) == 0); 1412 size_t length = byteLength / sizeof(char16_t); 1413 1414 for (size_t i = 0; i < length; i++) { 1415 char16_t c1 = substring1[i]; 1416 char16_t c2 = substring2[i]; 1417 if (c1 != c2) { 1418 #ifdef JS_HAS_INTL_API 1419 // Non-unicode regexps have weird case-folding rules. 1420 c1 = RegExpCaseFolding::Canonicalize(c1); 1421 c2 = RegExpCaseFolding::Canonicalize(c2); 1422 #else 1423 // If we aren't building with ICU, fall back to `/iu` mode. The only 1424 // differences are in corner cases. 1425 c1 = js::unicode::FoldCase(c1); 1426 c2 = js::unicode::FoldCase(c2); 1427 #endif 1428 if (c1 != c2) { 1429 return 0; 1430 } 1431 } 1432 } 1433 1434 return 1; 1435 } 1436 1437 // Compare two strings in `/iu` mode (ignoreCase and unicode). 1438 /*static */ 1439 uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareUnicode( 1440 const char16_t* substring1, const char16_t* substring2, size_t byteLength) { 1441 js::AutoUnsafeCallWithABI unsafe; 1442 1443 MOZ_ASSERT(byteLength % sizeof(char16_t) == 0); 1444 size_t length = byteLength / sizeof(char16_t); 1445 1446 for (size_t i = 0; i < length; i++) { 1447 char16_t c1 = substring1[i]; 1448 char16_t c2 = substring2[i]; 1449 if (c1 != c2) { 1450 // Unicode regexps use the common and simple case-folding 1451 // mappings of the Unicode Character Database. 1452 c1 = js::unicode::FoldCase(c1); 1453 c2 = js::unicode::FoldCase(c2); 1454 if (c1 != c2) { 1455 return 0; 1456 } 1457 } 1458 } 1459 1460 return 1; 1461 } 1462 1463 /* static */ 1464 bool SMRegExpMacroAssembler::GrowBacktrackStack(RegExpStack* regexp_stack) { 1465 js::AutoUnsafeCallWithABI unsafe; 1466 size_t size = regexp_stack->memory_size(); 1467 return !!regexp_stack->EnsureCapacity(size * 2); 1468 } 1469 1470 bool SMRegExpMacroAssembler::CanReadUnaligned() const { 1471 #if defined(JS_CODEGEN_ARM) 1472 return !js::jit::ARMFlags::HasAlignmentFault(); 1473 #elif defined(JS_CODEGEN_MIPS64) 1474 return false; 1475 #else 1476 return true; 1477 #endif 1478 } 1479 1480 } // namespace internal 1481 } // namespace v8