tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

RegExpNativeMacroAssembler.cpp (54945B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 // Copyright 2020 the V8 project authors. All rights reserved.
      8 // Use of this source code is governed by a BSD-style license that can be
      9 // found in the LICENSE file.
     10 
     11 #include "irregexp/imported/regexp-macro-assembler-arch.h"
     12 #include "irregexp/imported/regexp-stack.h"
     13 #include "irregexp/imported/special-case.h"
     14 #include "jit/Linker.h"
     15 #include "jit/PerfSpewer.h"
     16 #include "vm/MatchPairs.h"
     17 #include "vm/Realm.h"
     18 #ifdef MOZ_VTUNE
     19 #  include "vtune/VTuneWrapper.h"
     20 #endif
     21 
     22 #include "jit/ABIFunctionList-inl.h"
     23 #include "jit/MacroAssembler-inl.h"
     24 
     25 namespace v8 {
     26 namespace internal {
     27 
     28 using js::MatchPairs;
     29 using js::jit::AbsoluteAddress;
     30 using js::jit::Address;
     31 using js::jit::AllocatableGeneralRegisterSet;
     32 using js::jit::Assembler;
     33 using js::jit::BaseIndex;
     34 using js::jit::CodeLocationLabel;
     35 using js::jit::GeneralRegisterBackwardIterator;
     36 using js::jit::GeneralRegisterForwardIterator;
     37 using js::jit::GeneralRegisterSet;
     38 using js::jit::Imm32;
     39 using js::jit::ImmPtr;
     40 using js::jit::ImmWord;
     41 using js::jit::JitCode;
     42 using js::jit::Linker;
     43 using js::jit::LiveGeneralRegisterSet;
     44 using js::jit::Register;
     45 using js::jit::Registers;
     46 using js::jit::StackMacroAssembler;
     47 
     48 SMRegExpMacroAssembler::SMRegExpMacroAssembler(JSContext* cx,
     49                                               StackMacroAssembler& masm,
     50                                               Zone* zone, Mode mode,
     51                                               uint32_t num_capture_registers)
     52    : NativeRegExpMacroAssembler(cx->isolate.ref(), zone),
     53      cx_(cx),
     54      masm_(masm),
     55      mode_(mode),
     56      num_registers_(num_capture_registers),
     57      num_capture_registers_(num_capture_registers) {
     58  // Each capture has a start and an end register
     59  MOZ_ASSERT(num_capture_registers_ % 2 == 0);
     60 
     61  AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
     62 
     63  input_end_pointer_ = regs.takeAny();
     64  current_character_ = regs.takeAny();
     65  current_position_ = regs.takeAny();
     66  backtrack_stack_pointer_ = regs.takeAny();
     67  temp0_ = regs.takeAny();
     68  temp1_ = regs.takeAny();
     69  if (!regs.empty()) {
     70    // Not enough registers on x86.
     71    temp2_ = regs.takeAny();
     72  }
     73  savedRegisters_ = js::jit::SavedNonVolatileRegisters(regs);
     74 
     75  masm_.jump(&entry_label_);  // We'll generate the entry code later
     76  masm_.bind(&start_label_);  // and continue from here.
     77 }
     78 
     79 int SMRegExpMacroAssembler::stack_limit_slack_slot_count() {
     80  return RegExpStack::kStackLimitSlackSlotCount;
     81 }
     82 
     83 void SMRegExpMacroAssembler::AdvanceCurrentPosition(int by) {
     84  if (by != 0) {
     85    masm_.addPtr(Imm32(by * char_size()), current_position_);
     86  }
     87 }
     88 
     89 void SMRegExpMacroAssembler::AdvanceRegister(int reg, int by) {
     90  MOZ_ASSERT(reg >= 0 && reg < num_registers_);
     91  if (by != 0) {
     92    masm_.addPtr(Imm32(by), register_location(reg));
     93  }
     94 }
     95 
     96 void SMRegExpMacroAssembler::Backtrack() {
     97 #ifdef DEBUG
     98  js::jit::Label bailOut;
     99  // Check for simulating interrupt
    100  masm_.branch32(Assembler::NotEqual,
    101                 AbsoluteAddress(&cx_->isolate->shouldSimulateInterrupt_),
    102                 Imm32(0), &bailOut);
    103 #endif
    104  // Check for an interrupt. We have to restart from the beginning if we
    105  // are interrupted, so we only check for urgent interrupts.
    106  js::jit::Label noInterrupt;
    107  masm_.branchTest32(
    108      Assembler::Zero, AbsoluteAddress(cx_->addressOfInterruptBits()),
    109      Imm32(uint32_t(js::InterruptReason::CallbackUrgent)), &noInterrupt);
    110 #ifdef DEBUG
    111  // bailing out if we have simulating interrupt flag set
    112  masm_.bind(&bailOut);
    113 #endif
    114  masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error)), temp0_);
    115  masm_.jump(&exit_label_);
    116  masm_.bind(&noInterrupt);
    117 
    118  // Pop code offset from backtrack stack, add to code base address, and jump to
    119  // location.
    120  Pop(temp0_);
    121  PushBacktrackCodeOffsetPatch(masm_.movWithPatch(ImmPtr(nullptr), temp1_));
    122  masm_.addPtr(temp1_, temp0_);
    123  masm_.jump(temp0_);
    124 }
    125 
    126 void SMRegExpMacroAssembler::Bind(Label* label) {
    127  masm_.bind(label->inner());
    128  if (label->patchOffset_.bound()) {
    129    AddLabelPatch(label->patchOffset_, label->pos());
    130  }
    131 }
    132 
    133 // Check if current_position + cp_offset is the input start
    134 void SMRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond,
    135                                              Assembler::Condition cond) {
    136  Address addr(current_position_, cp_offset * char_size());
    137  masm_.computeEffectiveAddress(addr, temp0_);
    138 
    139  masm_.branchPtr(cond, inputStart(), temp0_, LabelOrBacktrack(on_cond));
    140 }
    141 
    142 void SMRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start) {
    143  CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal);
    144 }
    145 
    146 void SMRegExpMacroAssembler::CheckNotAtStart(int cp_offset,
    147                                             Label* on_not_at_start) {
    148  CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual);
    149 }
    150 
    151 void SMRegExpMacroAssembler::CheckCharacterImpl(Imm32 c, Label* on_cond,
    152                                                Assembler::Condition cond) {
    153  masm_.branch32(cond, current_character_, c, LabelOrBacktrack(on_cond));
    154 }
    155 
    156 void SMRegExpMacroAssembler::CheckCharacter(uint32_t c, Label* on_equal) {
    157  CheckCharacterImpl(Imm32(c), on_equal, Assembler::Equal);
    158 }
    159 
    160 void SMRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
    161                                               Label* on_not_equal) {
    162  CheckCharacterImpl(Imm32(c), on_not_equal, Assembler::NotEqual);
    163 }
    164 
    165 void SMRegExpMacroAssembler::CheckCharacterGT(base::uc16 limit,
    166                                              Label* on_greater) {
    167  CheckCharacterImpl(Imm32(limit), on_greater, Assembler::GreaterThan);
    168 }
    169 
    170 void SMRegExpMacroAssembler::CheckCharacterLT(base::uc16 limit,
    171                                              Label* on_less) {
    172  CheckCharacterImpl(Imm32(limit), on_less, Assembler::LessThan);
    173 }
    174 
    175 // Bitwise-and the current character with mask and then check for a
    176 // match with c.
    177 void SMRegExpMacroAssembler::CheckCharacterAfterAndImpl(uint32_t c,
    178                                                        uint32_t mask,
    179                                                        Label* on_cond,
    180                                                        bool is_not) {
    181  if (c == 0) {
    182    Assembler::Condition cond = is_not ? Assembler::NonZero : Assembler::Zero;
    183    masm_.branchTest32(cond, current_character_, Imm32(mask),
    184                       LabelOrBacktrack(on_cond));
    185  } else {
    186    Assembler::Condition cond = is_not ? Assembler::NotEqual : Assembler::Equal;
    187    masm_.move32(Imm32(mask), temp0_);
    188    masm_.and32(current_character_, temp0_);
    189    masm_.branch32(cond, temp0_, Imm32(c), LabelOrBacktrack(on_cond));
    190  }
    191 }
    192 
    193 void SMRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
    194                                                    Label* on_equal) {
    195  CheckCharacterAfterAndImpl(c, mask, on_equal, /*is_not =*/false);
    196 }
    197 
    198 void SMRegExpMacroAssembler::CheckNotCharacterAfterAnd(uint32_t c,
    199                                                       uint32_t mask,
    200                                                       Label* on_not_equal) {
    201  CheckCharacterAfterAndImpl(c, mask, on_not_equal, /*is_not =*/true);
    202 }
    203 
    204 // Subtract minus from the current character, then bitwise-and the
    205 // result with mask, then check for a match with c.
    206 void SMRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
    207    base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
    208  masm_.computeEffectiveAddress(Address(current_character_, -minus), temp0_);
    209  if (c == 0) {
    210    masm_.branchTest32(Assembler::NonZero, temp0_, Imm32(mask),
    211                       LabelOrBacktrack(on_not_equal));
    212  } else {
    213    masm_.and32(Imm32(mask), temp0_);
    214    masm_.branch32(Assembler::NotEqual, temp0_, Imm32(c),
    215                   LabelOrBacktrack(on_not_equal));
    216  }
    217 }
    218 
    219 // If the current position matches the position stored on top of the backtrack
    220 // stack, pops the backtrack stack and branches to the given label.
    221 void SMRegExpMacroAssembler::CheckFixedLengthLoop(Label* on_equal) {
    222  js::jit::Label fallthrough;
    223  masm_.load32SignExtendToPtr(Address(backtrack_stack_pointer_, 0), temp0_);
    224  masm_.branchPtr(Assembler::NotEqual, temp0_, current_position_, &fallthrough);
    225  masm_.addPtr(Imm32(sizeof(int32_t)), backtrack_stack_pointer_);  // Pop.
    226  JumpOrBacktrack(on_equal);
    227  masm_.bind(&fallthrough);
    228 }
    229 
    230 void SMRegExpMacroAssembler::CheckCharacterInRangeImpl(
    231    base::uc16 from, base::uc16 to, Label* on_cond, Assembler::Condition cond) {
    232  // x is in [from,to] if unsigned(x - from) <= to - from
    233  masm_.computeEffectiveAddress(Address(current_character_, -from), temp0_);
    234  masm_.branch32(cond, temp0_, Imm32(to - from), LabelOrBacktrack(on_cond));
    235 }
    236 
    237 void SMRegExpMacroAssembler::CheckCharacterInRange(base::uc16 from,
    238                                                   base::uc16 to,
    239                                                   Label* on_in_range) {
    240  CheckCharacterInRangeImpl(from, to, on_in_range, Assembler::BelowOrEqual);
    241 }
    242 
    243 void SMRegExpMacroAssembler::CheckCharacterNotInRange(base::uc16 from,
    244                                                      base::uc16 to,
    245                                                      Label* on_not_in_range) {
    246  CheckCharacterInRangeImpl(from, to, on_not_in_range, Assembler::Above);
    247 }
    248 
    249 /* static */
    250 bool SMRegExpMacroAssembler::IsCharacterInRangeArray(uint32_t c,
    251                                                     ByteArrayData* ranges) {
    252  js::AutoUnsafeCallWithABI unsafe;
    253  MOZ_ASSERT(ranges->length() % sizeof(uint16_t) == 0);
    254  uint32_t length = ranges->length() / sizeof(uint16_t);
    255  MOZ_ASSERT(length > 0);
    256 
    257  // Fast paths.
    258  if (c < ranges->getTyped<uint16_t>(0)) {
    259    // |c| is lower than the start of the first range.
    260    // It is not in the range array.
    261    return false;
    262  }
    263  if (c >= ranges->getTyped<uint16_t>(length - 1)) {
    264    // |c| is higher than the last entry. If the table contains an odd
    265    // number of entries, the last range is open-ended, so |c| is in
    266    // the range array iff |length| is odd.
    267    return (length % 2) != 0;
    268  }
    269 
    270  // |ranges| is stored as an interval list: an ordered list of
    271  // starting points, where every even index marks the beginning of a
    272  // range of characters that are included, and every odd index marks
    273  // the beginning of a range of characters that are excluded. For
    274  // example, the set [1,2,3,7,8,9] would be represented as the
    275  // range array [1,4,7,10]. If |ranges| has an odd number of entries,
    276  // the last included range is open-ended (so the set containing
    277  // every character would be represented as [0]).
    278  //
    279  // Because of the symmetry between included and excluded ranges, we
    280  // can do a binary search for the index in |ranges| with the value
    281  // closest to but not exceeding |c|. If that index is even, |c| is
    282  // in an included range. If that index is odd, |c| is in an excluded
    283  // range.
    284  uint32_t lower = 0;
    285  uint32_t upper = length;
    286  uint32_t mid = 0;
    287  do {
    288    mid = lower + (upper - lower) / 2;
    289    const base::uc16 elem = ranges->getTyped<uint16_t>(mid);
    290    if (c < elem) {
    291      upper = mid;
    292    } else if (c > elem) {
    293      lower = mid + 1;
    294    } else {
    295      break;
    296    }
    297  } while (lower < upper);
    298  uint32_t rangeIndex = c < ranges->getTyped<uint16_t>(mid) ? mid - 1 : mid;
    299 
    300  // Included ranges start at even indices and end at odd indices.
    301  return rangeIndex % 2 == 0;
    302 }
    303 
    304 void SMRegExpMacroAssembler::CallIsCharacterInRangeArray(
    305    const ZoneList<CharacterRange>* ranges) {
    306  Handle<ByteArray> rangeArray = GetOrAddRangeArray(ranges);
    307  masm_.movePtr(ImmPtr(rangeArray->inner()), temp0_);
    308 
    309  // Save volatile regs. Temp regs don't need to be saved.
    310  LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
    311  volatileRegs.takeUnchecked(temp0_);
    312  volatileRegs.takeUnchecked(temp1_);
    313  if (temp2_ != js::jit::InvalidReg) {
    314    volatileRegs.takeUnchecked(temp2_);
    315  }
    316  masm_.PushRegsInMask(volatileRegs);
    317 
    318  using Fn = bool (*)(uint32_t, ByteArrayData*);
    319  masm_.setupUnalignedABICall(temp1_);
    320  masm_.passABIArg(current_character_);
    321  masm_.passABIArg(temp0_);
    322 
    323  masm_.callWithABI<Fn, ::js::irregexp::IsCharacterInRangeArray>();
    324  masm_.storeCallBoolResult(temp1_);
    325  masm_.PopRegsInMask(volatileRegs);
    326 
    327  // GetOrAddRangeArray caches previously seen range arrays to reduce
    328  // memory usage, so this may not be the first time we've seen this
    329  // range array. We only need to transfer ownership from the
    330  // HandleScope to the |tables_| vector once.
    331  PseudoHandle<ByteArrayData> rawRangeArray =
    332      rangeArray->maybeTakeOwnership(isolate());
    333  if (rawRangeArray) {
    334    AddTable(std::move(rawRangeArray));
    335  }
    336 }
    337 
    338 bool SMRegExpMacroAssembler::CheckCharacterInRangeArray(
    339    const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
    340  CallIsCharacterInRangeArray(ranges);
    341  masm_.branchTest32(Assembler::NonZero, temp1_, temp1_,
    342                     LabelOrBacktrack(on_in_range));
    343  return true;
    344 }
    345 
    346 bool SMRegExpMacroAssembler::CheckCharacterNotInRangeArray(
    347    const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
    348  CallIsCharacterInRangeArray(ranges);
    349  masm_.branchTest32(Assembler::Zero, temp1_, temp1_,
    350                     LabelOrBacktrack(on_not_in_range));
    351  return true;
    352 }
    353 
    354 void SMRegExpMacroAssembler::CheckBitInTable(Handle<ByteArray> table,
    355                                             Label* on_bit_set) {
    356  // Claim ownership of the ByteArray from the current HandleScope.
    357  // ByteArrays are allocated on the C++ heap and are (eventually)
    358  // owned by the RegExpShared.
    359  PseudoHandle<ByteArrayData> rawTable = table->takeOwnership(isolate());
    360 
    361  masm_.movePtr(ImmPtr(rawTable->data()), temp0_);
    362 
    363  masm_.move32(Imm32(kTableMask), temp1_);
    364  masm_.and32(current_character_, temp1_);
    365 
    366  masm_.load8ZeroExtend(BaseIndex(temp0_, temp1_, js::jit::TimesOne), temp0_);
    367  masm_.branchTest32(Assembler::NonZero, temp0_, temp0_,
    368                     LabelOrBacktrack(on_bit_set));
    369 
    370  // Transfer ownership of |rawTable| to the |tables_| vector.
    371  AddTable(std::move(rawTable));
    372 }
    373 
    374 void SMRegExpMacroAssembler::SkipUntilBitInTable(int cp_offset,
    375                                                 Handle<ByteArray> table,
    376                                                 Handle<ByteArray> nibble_table,
    377                                                 int advance_by) {
    378  // Claim ownership of the ByteArray from the current HandleScope.
    379  // ByteArrays are allocated on the C++ heap and are (eventually)
    380  // owned by the RegExpShared.
    381  PseudoHandle<ByteArrayData> rawTable = table->takeOwnership(isolate());
    382 
    383  // TODO: SIMD support (bug 1928862).
    384  MOZ_ASSERT(!SkipUntilBitInTableUseSimd(advance_by));
    385 
    386  // Scalar version.
    387  Register tableReg = temp0_;
    388  masm_.movePtr(ImmPtr(rawTable->data()), tableReg);
    389 
    390  Label cont;
    391  js::jit::Label scalarRepeat;
    392  masm_.bind(&scalarRepeat);
    393  CheckPosition(cp_offset, &cont);
    394  LoadCurrentCharacterUnchecked(cp_offset, 1);
    395 
    396  Register index = current_character_;
    397  if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
    398    index = temp1_;
    399    masm_.and32(Imm32(kTableMask), current_character_, index);
    400  }
    401 
    402  masm_.load8ZeroExtend(BaseIndex(tableReg, index, js::jit::TimesOne), index);
    403  masm_.branchTest32(Assembler::NonZero, index, index, cont.inner());
    404  AdvanceCurrentPosition(advance_by);
    405  masm_.jump(&scalarRepeat);
    406 
    407  masm_.bind(cont.inner());
    408 
    409  // Transfer ownership of |rawTable| to the |tables_| vector.
    410  AddTable(std::move(rawTable));
    411 }
    412 
    413 bool SMRegExpMacroAssembler::SkipUntilBitInTableUseSimd(int advance_by) {
    414  // V8 found that using SIMD instead of the scalar version was only
    415  // faster when we are advancing by 1 byte per iteration.
    416  bool simdEnabled = false;
    417  return simdEnabled && advance_by * char_size() == 1;
    418 }
    419 
    420 void SMRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg,
    421                                                       bool read_backward,
    422                                                       bool unicode,
    423                                                       Label* on_no_match,
    424                                                       bool ignore_case) {
    425  js::jit::Label fallthrough;
    426 
    427  // Captures are stored as a sequential pair of registers.
    428  // Find the length of the back-referenced capture and load the
    429  // capture's start index into current_character_.
    430  masm_.loadPtr(register_location(start_reg),  // index of start
    431                current_character_);
    432  masm_.loadPtr(register_location(start_reg + 1), temp0_);  // index of end
    433  masm_.subPtr(current_character_, temp0_);                 // length of capture
    434 
    435  // Capture registers are either both set or both cleared.
    436  // If the capture length is zero, then the capture is either empty or cleared.
    437  // Fall through in both cases.
    438  masm_.branchPtr(Assembler::Equal, temp0_, ImmWord(0), &fallthrough);
    439 
    440  // Check that there are sufficient characters left in the input.
    441  if (read_backward) {
    442    // If start + len > current, there isn't enough room for a
    443    // lookbehind backreference.
    444    masm_.loadPtr(inputStart(), temp1_);
    445    masm_.addPtr(temp0_, temp1_);
    446    masm_.branchPtr(Assembler::GreaterThan, temp1_, current_position_,
    447                    LabelOrBacktrack(on_no_match));
    448  } else {
    449    // current_position_ is the negative offset from the end.
    450    // If current + len > 0, there isn't enough room for a backreference.
    451    masm_.movePtr(current_position_, temp1_);
    452    masm_.addPtr(temp0_, temp1_);
    453    masm_.branchPtr(Assembler::GreaterThan, temp1_, ImmWord(0),
    454                    LabelOrBacktrack(on_no_match));
    455  }
    456 
    457  if (mode_ == UC16 && ignore_case) {
    458    // We call a helper function for case-insensitive non-latin1 strings.
    459 
    460    // Save volatile regs. temp1_, temp2_, and current_character_
    461    // don't need to be saved.  current_position_ needs to be saved
    462    // even if it's non-volatile, because we modify it to use as an argument.
    463    LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
    464    volatileRegs.addUnchecked(current_position_);
    465    volatileRegs.takeUnchecked(temp1_);
    466    if (temp2_ != js::jit::InvalidReg) {
    467      volatileRegs.takeUnchecked(temp2_);
    468    }
    469    volatileRegs.takeUnchecked(current_character_);
    470    masm_.PushRegsInMask(volatileRegs);
    471 
    472    // Parameters are
    473    //   Address captured - Address of captured substring's start.
    474    //   Address current - Address of current character position.
    475    //   size_t byte_length - length of capture (in bytes)
    476 
    477    // Compute |captured|
    478    masm_.addPtr(input_end_pointer_, current_character_);
    479 
    480    // Compute |current|
    481    masm_.addPtr(input_end_pointer_, current_position_);
    482    if (read_backward) {
    483      // Offset by length when matching backwards.
    484      masm_.subPtr(temp0_, current_position_);
    485    }
    486 
    487    using Fn = uint32_t (*)(const char16_t*, const char16_t*, size_t);
    488    masm_.setupUnalignedABICall(temp1_);
    489    masm_.passABIArg(current_character_);
    490    masm_.passABIArg(current_position_);
    491    masm_.passABIArg(temp0_);
    492 
    493    if (unicode) {
    494      masm_.callWithABI<Fn, ::js::irregexp::CaseInsensitiveCompareUnicode>();
    495    } else {
    496      masm_.callWithABI<Fn, ::js::irregexp::CaseInsensitiveCompareNonUnicode>();
    497    }
    498    masm_.storeCallInt32Result(temp1_);
    499    masm_.PopRegsInMask(volatileRegs);
    500    masm_.branchTest32(Assembler::Zero, temp1_, temp1_,
    501                       LabelOrBacktrack(on_no_match));
    502 
    503    // On success, advance position by length of capture
    504    if (read_backward) {
    505      masm_.subPtr(temp0_, current_position_);
    506    } else {
    507      masm_.addPtr(temp0_, current_position_);
    508    }
    509 
    510    masm_.bind(&fallthrough);
    511    return;
    512  }
    513 
    514  // We will be modifying current_position_. Save it in case the match fails.
    515  masm_.push(current_position_);
    516 
    517  // Compute start of capture string
    518  masm_.addPtr(input_end_pointer_, current_character_);
    519 
    520  // Compute start of match string
    521  masm_.addPtr(input_end_pointer_, current_position_);
    522  if (read_backward) {
    523    // Offset by length when matching backwards.
    524    masm_.subPtr(temp0_, current_position_);
    525  }
    526 
    527  // Compute end of match string
    528  masm_.addPtr(current_position_, temp0_);
    529 
    530  Register nextCaptureChar = temp1_;
    531  Register nextMatchChar = temp2_;
    532 
    533  if (temp2_ == js::jit::InvalidReg) {
    534    masm_.push(backtrack_stack_pointer_);
    535    nextMatchChar = backtrack_stack_pointer_;
    536  }
    537 
    538  js::jit::Label success;
    539  js::jit::Label fail;
    540  js::jit::Label loop;
    541  masm_.bind(&loop);
    542 
    543  // Load next character from each string.
    544  if (mode_ == LATIN1) {
    545    masm_.load8ZeroExtend(Address(current_character_, 0), nextCaptureChar);
    546    masm_.load8ZeroExtend(Address(current_position_, 0), nextMatchChar);
    547  } else {
    548    masm_.load16ZeroExtend(Address(current_character_, 0), nextCaptureChar);
    549    masm_.load16ZeroExtend(Address(current_position_, 0), nextMatchChar);
    550  }
    551 
    552  if (ignore_case) {
    553    MOZ_ASSERT(mode_ == LATIN1);
    554    // Try exact match.
    555    js::jit::Label loop_increment;
    556    masm_.branch32(Assembler::Equal, nextCaptureChar, nextMatchChar,
    557                   &loop_increment);
    558 
    559    // Mismatch. Try case-insensitive match.
    560    // Force the capture character to lower case (by setting bit 0x20)
    561    // then check to see if it is a letter.
    562    js::jit::Label convert_match;
    563    masm_.or32(Imm32(0x20), nextCaptureChar);
    564 
    565    // Check if it is in [a,z].
    566    masm_.computeEffectiveAddress(Address(nextCaptureChar, -'a'),
    567                                  nextMatchChar);
    568    masm_.branch32(Assembler::BelowOrEqual, nextMatchChar, Imm32('z' - 'a'),
    569                   &convert_match);
    570    // Check for values in range [224,254].
    571    // Exclude 247 (U+00F7 DIVISION SIGN).
    572    masm_.sub32(Imm32(224 - 'a'), nextMatchChar);
    573    masm_.branch32(Assembler::Above, nextMatchChar, Imm32(254 - 224), &fail);
    574    masm_.branch32(Assembler::Equal, nextMatchChar, Imm32(247 - 224), &fail);
    575 
    576    // Capture character is lower case. Convert match character
    577    // to lower case and compare.
    578    masm_.bind(&convert_match);
    579    masm_.load8ZeroExtend(Address(current_position_, 0), nextMatchChar);
    580    masm_.or32(Imm32(0x20), nextMatchChar);
    581    masm_.branch32(Assembler::NotEqual, nextCaptureChar, nextMatchChar, &fail);
    582 
    583    masm_.bind(&loop_increment);
    584  } else {
    585    // Fail if characters do not match.
    586    masm_.branch32(Assembler::NotEqual, nextCaptureChar, nextMatchChar, &fail);
    587  }
    588 
    589  // Increment pointers into match and capture strings.
    590  masm_.addPtr(Imm32(char_size()), current_character_);
    591  masm_.addPtr(Imm32(char_size()), current_position_);
    592 
    593  // Loop if we have not reached the end of the match string.
    594  masm_.branchPtr(Assembler::Below, current_position_, temp0_, &loop);
    595  masm_.jump(&success);
    596 
    597  // If we fail, restore current_position_ and branch.
    598  masm_.bind(&fail);
    599  if (temp2_ == js::jit::InvalidReg) {
    600    // Restore backtrack_stack_pointer_ when it was used as a temp register.
    601    masm_.pop(backtrack_stack_pointer_);
    602  }
    603  masm_.pop(current_position_);
    604  JumpOrBacktrack(on_no_match);
    605 
    606  masm_.bind(&success);
    607 
    608  if (temp2_ == js::jit::InvalidReg) {
    609    // Restore backtrack_stack_pointer_ when it was used as a temp register.
    610    masm_.pop(backtrack_stack_pointer_);
    611  }
    612  // Drop saved value of current_position_
    613  masm_.addToStackPtr(Imm32(sizeof(uintptr_t)));
    614 
    615  // current_position_ is a pointer. Convert it back to an offset.
    616  masm_.subPtr(input_end_pointer_, current_position_);
    617  if (read_backward) {
    618    // Subtract match length if we matched backward
    619    masm_.addPtr(register_location(start_reg), current_position_);
    620    masm_.subPtr(register_location(start_reg + 1), current_position_);
    621  }
    622 
    623  masm_.bind(&fallthrough);
    624 }
    625 
    626 // Branch if a back-reference does not match a previous capture.
    627 void SMRegExpMacroAssembler::CheckNotBackReference(int start_reg,
    628                                                   bool read_backward,
    629                                                   Label* on_no_match) {
    630  CheckNotBackReferenceImpl(start_reg, read_backward, /*unicode = */ false,
    631                            on_no_match, /*ignore_case = */ false);
    632 }
    633 
    634 void SMRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
    635    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
    636  CheckNotBackReferenceImpl(start_reg, read_backward, unicode, on_no_match,
    637                            /*ignore_case = */ true);
    638 }
    639 
    640 // Checks whether the given offset from the current position is
    641 // inside the input string.
    642 void SMRegExpMacroAssembler::CheckPosition(int cp_offset,
    643                                           Label* on_outside_input) {
    644  // Note: current_position_ is a (negative) byte offset relative to
    645  // the end of the input string.
    646  if (cp_offset >= 0) {
    647    //      end + current + offset >= end
    648    // <=>        current + offset >= 0
    649    // <=>        current          >= -offset
    650    masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_,
    651                    ImmWord(-cp_offset * char_size()),
    652                    LabelOrBacktrack(on_outside_input));
    653  } else {
    654    // Compute offset position
    655    masm_.computeEffectiveAddress(
    656        Address(current_position_, cp_offset * char_size()), temp0_);
    657 
    658    // Compare to start of input.
    659    masm_.branchPtr(Assembler::GreaterThan, inputStart(), temp0_,
    660                    LabelOrBacktrack(on_outside_input));
    661  }
    662 }
    663 
    664 // This function attempts to generate special case code for character classes.
    665 // Returns true if a special case is generated.
    666 // Otherwise returns false and generates no code.
    667 bool SMRegExpMacroAssembler::CheckSpecialCharacterClass(
    668    StandardCharacterSet type, Label* on_no_match) {
    669  js::jit::Label* no_match = LabelOrBacktrack(on_no_match);
    670 
    671  // Note: throughout this function, range checks (c in [min, max])
    672  // are implemented by an unsigned (c - min) <= (max - min) check.
    673  switch (type) {
    674    case StandardCharacterSet::kWhitespace: {
    675      // Match space-characters
    676      if (mode_ != LATIN1) {
    677        return false;
    678      }
    679      js::jit::Label success;
    680      // One byte space characters are ' ', '\t'..'\r', and '\u00a0' (NBSP).
    681 
    682      // Check ' '
    683      masm_.branch32(Assembler::Equal, current_character_, Imm32(' '),
    684                     &success);
    685 
    686      // Check '\t'..'\r'
    687      masm_.computeEffectiveAddress(Address(current_character_, -'\t'), temp0_);
    688      masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('\r' - '\t'),
    689                     &success);
    690 
    691      // Check \u00a0.
    692      masm_.branch32(Assembler::NotEqual, temp0_, Imm32(0x00a0 - '\t'),
    693                     no_match);
    694 
    695      masm_.bind(&success);
    696      return true;
    697    }
    698    case StandardCharacterSet::kNotWhitespace:
    699      // The emitted code for generic character classes is good enough.
    700      return false;
    701    case StandardCharacterSet::kDigit:
    702      // Match latin1 digits ('0'-'9')
    703      masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_);
    704      masm_.branch32(Assembler::Above, temp0_, Imm32('9' - '0'), no_match);
    705      return true;
    706    case StandardCharacterSet::kNotDigit:
    707      // Match anything except latin1 digits ('0'-'9')
    708      masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_);
    709      masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('9' - '0'),
    710                     no_match);
    711      return true;
    712    case StandardCharacterSet::kNotLineTerminator:
    713      // Match non-newlines. This excludes '\n' (0x0a), '\r' (0x0d),
    714      // U+2028 LINE SEPARATOR, and U+2029 PARAGRAPH SEPARATOR.
    715      // See https://tc39.es/ecma262/#prod-LineTerminator
    716 
    717      // To test for 0x0a and 0x0d efficiently, we XOR the input with 1.
    718      // This converts 0x0a to 0x0b, and 0x0d to 0x0c, allowing us to
    719      // test for the contiguous range 0x0b..0x0c.
    720      masm_.xor32(Imm32(0x01), current_character_, temp0_);
    721      masm_.sub32(Imm32(0x0b), temp0_);
    722      masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b),
    723                     no_match);
    724 
    725      if (mode_ == UC16) {
    726        // Compare original value to 0x2028 and 0x2029, using the already
    727        // computed (current_char ^ 0x01 - 0x0b). I.e., check for
    728        // 0x201d (0x2028 - 0x0b) or 0x201e.
    729        masm_.sub32(Imm32(0x2028 - 0x0b), temp0_);
    730        masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x2029 - 0x2028),
    731                       no_match);
    732      }
    733      return true;
    734    case StandardCharacterSet::kWord:
    735      // \w matches the set of 63 characters defined in Runtime Semantics:
    736      // WordCharacters. We use a static lookup table, which is defined in
    737      // regexp-macro-assembler.cc.
    738      // Note: if both Unicode and IgnoreCase are true, \w matches a
    739      // larger set of characters. That case is handled elsewhere.
    740      if (mode_ != LATIN1) {
    741        masm_.branch32(Assembler::Above, current_character_, Imm32('z'),
    742                       no_match);
    743      }
    744      static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar);
    745      masm_.movePtr(ImmPtr(word_character_map), temp0_);
    746      masm_.load8ZeroExtend(
    747          BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_);
    748      masm_.branchTest32(Assembler::Zero, temp0_, temp0_, no_match);
    749      return true;
    750    case StandardCharacterSet::kNotWord: {
    751      // See 'w' above.
    752      js::jit::Label done;
    753      if (mode_ != LATIN1) {
    754        masm_.branch32(Assembler::Above, current_character_, Imm32('z'), &done);
    755      }
    756      static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar);
    757      masm_.movePtr(ImmPtr(word_character_map), temp0_);
    758      masm_.load8ZeroExtend(
    759          BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_);
    760      masm_.branchTest32(Assembler::NonZero, temp0_, temp0_, no_match);
    761      if (mode_ != LATIN1) {
    762        masm_.bind(&done);
    763      }
    764      return true;
    765    }
    766      ////////////////////////////////////////////////////////////////////////
    767      // Non-standard classes (with no syntactic shorthand) used internally //
    768      ////////////////////////////////////////////////////////////////////////
    769    case StandardCharacterSet::kEverything:
    770      // Match any character
    771      return true;
    772    case StandardCharacterSet::kLineTerminator:
    773      // Match newlines. The opposite of '.'. See '.' above.
    774      masm_.xor32(Imm32(0x01), current_character_, temp0_);
    775      masm_.sub32(Imm32(0x0b), temp0_);
    776      if (mode_ == LATIN1) {
    777        masm_.branch32(Assembler::Above, temp0_, Imm32(0x0c - 0x0b), no_match);
    778      } else {
    779        MOZ_ASSERT(mode_ == UC16);
    780        js::jit::Label done;
    781        masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b),
    782                       &done);
    783 
    784        // Compare original value to 0x2028 and 0x2029, using the already
    785        // computed (current_char ^ 0x01 - 0x0b). I.e., check for
    786        // 0x201d (0x2028 - 0x0b) or 0x201e.
    787        masm_.sub32(Imm32(0x2028 - 0x0b), temp0_);
    788        masm_.branch32(Assembler::Above, temp0_, Imm32(0x2029 - 0x2028),
    789                       no_match);
    790        masm_.bind(&done);
    791      }
    792      return true;
    793  }
    794  return false;
    795 }
    796 
    797 void SMRegExpMacroAssembler::Fail() {
    798  masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Success_NotFound)),
    799                temp0_);
    800  masm_.jump(&exit_label_);
    801 }
    802 
    803 void SMRegExpMacroAssembler::GoTo(Label* to) {
    804  masm_.jump(LabelOrBacktrack(to));
    805 }
    806 
    807 void SMRegExpMacroAssembler::IfRegisterGE(int reg, int comparand,
    808                                          Label* if_ge) {
    809  masm_.branchPtr(Assembler::GreaterThanOrEqual, register_location(reg),
    810                  ImmWord(comparand), LabelOrBacktrack(if_ge));
    811 }
    812 
    813 void SMRegExpMacroAssembler::IfRegisterLT(int reg, int comparand,
    814                                          Label* if_lt) {
    815  masm_.branchPtr(Assembler::LessThan, register_location(reg),
    816                  ImmWord(comparand), LabelOrBacktrack(if_lt));
    817 }
    818 
    819 void SMRegExpMacroAssembler::IfRegisterEqPos(int reg, Label* if_eq) {
    820  masm_.branchPtr(Assembler::Equal, register_location(reg), current_position_,
    821                  LabelOrBacktrack(if_eq));
    822 }
    823 
    824 // This is a word-for-word identical copy of the V8 code, which is
    825 // duplicated in at least nine different places in V8 (one per
    826 // supported architecture) with no differences outside of comments and
    827 // formatting. It should be hoisted into the superclass. Once that is
    828 // done upstream, this version can be deleted.
    829 void SMRegExpMacroAssembler::LoadCurrentCharacterImpl(int cp_offset,
    830                                                      Label* on_end_of_input,
    831                                                      bool check_bounds,
    832                                                      int characters,
    833                                                      int eats_at_least) {
    834  // It's possible to preload a small number of characters when each success
    835  // path requires a large number of characters, but not the reverse.
    836  MOZ_ASSERT(eats_at_least >= characters);
    837  MOZ_ASSERT(cp_offset < (1 << 30));  // Be sane! (And ensure negation works)
    838 
    839  if (check_bounds) {
    840    if (cp_offset >= 0) {
    841      CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
    842    } else {
    843      CheckPosition(cp_offset, on_end_of_input);
    844    }
    845  }
    846  LoadCurrentCharacterUnchecked(cp_offset, characters);
    847 }
    848 
    849 // Load the character (or characters) at the specified offset from the
    850 // current position. Zero-extend to 32 bits.
    851 void SMRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset,
    852                                                           int characters) {
    853  BaseIndex address(input_end_pointer_, current_position_, js::jit::TimesOne,
    854                    cp_offset * char_size());
    855  if (mode_ == LATIN1) {
    856    if (characters == 4) {
    857      masm_.load32(address, current_character_);
    858    } else if (characters == 2) {
    859      masm_.load16ZeroExtend(address, current_character_);
    860    } else {
    861      MOZ_ASSERT(characters == 1);
    862      masm_.load8ZeroExtend(address, current_character_);
    863    }
    864  } else {
    865    MOZ_ASSERT(mode_ == UC16);
    866    if (characters == 2) {
    867      masm_.load32(address, current_character_);
    868    } else {
    869      MOZ_ASSERT(characters == 1);
    870      masm_.load16ZeroExtend(address, current_character_);
    871    }
    872  }
    873 }
    874 
    875 void SMRegExpMacroAssembler::PopCurrentPosition() { Pop(current_position_); }
    876 
    877 void SMRegExpMacroAssembler::PopRegister(int register_index) {
    878  Pop(temp0_);
    879  masm_.storePtr(temp0_, register_location(register_index));
    880 }
    881 
    882 void SMRegExpMacroAssembler::PushBacktrack(Label* label) {
    883  MOZ_ASSERT(!label->is_bound());
    884  MOZ_ASSERT(!label->patchOffset_.bound());
    885  label->patchOffset_ = masm_.movWithPatch(ImmPtr(nullptr), temp0_);
    886  MOZ_ASSERT(label->patchOffset_.bound());
    887 
    888  Push(temp0_);
    889 
    890  CheckBacktrackStackLimit();
    891 }
    892 
    893 void SMRegExpMacroAssembler::PushCurrentPosition() { Push(current_position_); }
    894 
    895 void SMRegExpMacroAssembler::PushRegister(int register_index,
    896                                          StackCheckFlag check_stack_limit) {
    897  masm_.loadPtr(register_location(register_index), temp0_);
    898  Push(temp0_);
    899  if (check_stack_limit) {
    900    CheckBacktrackStackLimit();
    901  }
    902 }
    903 
    904 void SMRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg) {
    905  masm_.loadPtr(register_location(reg), current_position_);
    906 }
    907 
    908 void SMRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg,
    909                                                            int cp_offset) {
    910  if (cp_offset == 0) {
    911    masm_.storePtr(current_position_, register_location(reg));
    912  } else {
    913    Address addr(current_position_, cp_offset * char_size());
    914    masm_.computeEffectiveAddress(addr, temp0_);
    915    masm_.storePtr(temp0_, register_location(reg));
    916  }
    917 }
    918 
    919 // Note: The backtrack stack pointer is stored in a register as an
    920 // offset from the stack top, not as a bare pointer, so that it is not
    921 // corrupted if the backtrack stack grows (and therefore moves).
    922 void SMRegExpMacroAssembler::ReadStackPointerFromRegister(int reg) {
    923  masm_.loadPtr(register_location(reg), backtrack_stack_pointer_);
    924  masm_.addPtr(backtrackStackBase(), backtrack_stack_pointer_);
    925 }
    926 void SMRegExpMacroAssembler::WriteStackPointerToRegister(int reg) {
    927  masm_.movePtr(backtrack_stack_pointer_, temp0_);
    928  masm_.subPtr(backtrackStackBase(), temp0_);
    929  masm_.storePtr(temp0_, register_location(reg));
    930 }
    931 
    932 // When matching a regexp that is anchored at the end, this operation
    933 // is used to try skipping the beginning of long strings. If the
    934 // maximum length of a match is less than the length of the string, we
    935 // can skip the initial len - max_len bytes.
    936 void SMRegExpMacroAssembler::SetCurrentPositionFromEnd(int by) {
    937  js::jit::Label after_position;
    938  masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_,
    939                  ImmWord(-by * char_size()), &after_position);
    940  masm_.movePtr(ImmWord(-by * char_size()), current_position_);
    941 
    942  // On RegExp code entry (where this operation is used), the character before
    943  // the current position is expected to be already loaded.
    944  // We have advanced the position, so it's safe to read backwards.
    945  LoadCurrentCharacterUnchecked(-1, 1);
    946  masm_.bind(&after_position);
    947 }
    948 
    949 void SMRegExpMacroAssembler::SetRegister(int register_index, int to) {
    950  MOZ_ASSERT(register_index >= num_capture_registers_);
    951  masm_.storePtr(ImmWord(to), register_location(register_index));
    952 }
    953 
    954 // Returns true if a regexp match can be restarted (aka the regexp is global).
    955 // The return value is not used anywhere, but we implement it to be safe.
    956 bool SMRegExpMacroAssembler::Succeed() {
    957  masm_.jump(&success_label_);
    958  return global();
    959 }
    960 
    961 // Capture registers are initialized to input[-1]
    962 void SMRegExpMacroAssembler::ClearRegisters(int reg_from, int reg_to) {
    963  MOZ_ASSERT(reg_from <= reg_to);
    964  masm_.loadPtr(inputStart(), temp0_);
    965  masm_.subPtr(Imm32(char_size()), temp0_);
    966  for (int reg = reg_from; reg <= reg_to; reg++) {
    967    masm_.storePtr(temp0_, register_location(reg));
    968  }
    969 }
    970 
    971 void SMRegExpMacroAssembler::Push(Register source) {
    972  MOZ_ASSERT(source != backtrack_stack_pointer_);
    973 
    974  masm_.subPtr(Imm32(sizeof(int32_t)), backtrack_stack_pointer_);
    975  masm_.store32(source, Address(backtrack_stack_pointer_, 0));
    976 }
    977 
    978 void SMRegExpMacroAssembler::Pop(Register target) {
    979  MOZ_ASSERT(target != backtrack_stack_pointer_);
    980 
    981  masm_.load32SignExtendToPtr(Address(backtrack_stack_pointer_, 0), target);
    982  masm_.addPtr(Imm32(sizeof(int32_t)), backtrack_stack_pointer_);
    983 }
    984 
    985 void SMRegExpMacroAssembler::JumpOrBacktrack(Label* to) {
    986  if (to) {
    987    masm_.jump(to->inner());
    988  } else {
    989    Backtrack();
    990  }
    991 }
    992 
    993 // Generate a quick inline test for backtrack stack overflow.
    994 // If the test fails, call an OOL handler to try growing the stack.
    995 void SMRegExpMacroAssembler::CheckBacktrackStackLimit() {
    996  js::jit::Label no_stack_overflow;
    997  masm_.branchPtr(
    998      Assembler::Below,
    999      AbsoluteAddress(isolate()->regexp_stack()->limit_address_address()),
   1000      backtrack_stack_pointer_, &no_stack_overflow);
   1001 
   1002  masm_.call(&stack_overflow_label_);
   1003 
   1004  // Exit with an exception if the call failed
   1005  masm_.branchTest32(Assembler::Zero, temp0_, temp0_,
   1006                     &exit_with_exception_label_);
   1007 
   1008  masm_.bind(&no_stack_overflow);
   1009 }
   1010 
   1011 // This is used to sneak an OOM through the V8 layer.
   1012 static Handle<HeapObject> DummyCode() {
   1013  return Handle<HeapObject>::fromHandleValue(JS::UndefinedHandleValue);
   1014 }
   1015 
   1016 // Finalize code. This is called last, so that we know how many
   1017 // registers we need.
   1018 Handle<HeapObject> SMRegExpMacroAssembler::GetCode(Handle<String> source,
   1019                                                   RegExpFlags flags) {
   1020  if (!cx_->zone()->ensureJitZoneExists(cx_)) {
   1021    return DummyCode();
   1022  }
   1023 
   1024  masm_.bind(&entry_label_);
   1025 
   1026  createStackFrame();
   1027  initFrameAndRegs();
   1028 
   1029  masm_.jump(&start_label_);
   1030 
   1031  successHandler();
   1032  exitHandler();
   1033  backtrackHandler();
   1034  stackOverflowHandler();
   1035 
   1036  Linker linker(masm_);
   1037  JitCode* code = linker.newCode(cx_, js::jit::CodeKind::RegExp);
   1038  if (!code) {
   1039    return DummyCode();
   1040  }
   1041 
   1042  for (LabelPatch& lp : labelPatches_) {
   1043    Assembler::PatchDataWithValueCheck(CodeLocationLabel(code, lp.patchOffset_),
   1044                                       ImmPtr((void*)lp.labelOffset_),
   1045                                       ImmPtr(nullptr));
   1046  }
   1047 
   1048  for (js::jit::CodeOffset& offset : backtrackCodeOffsetPatches_) {
   1049    Assembler::PatchDataWithValueCheck(CodeLocationLabel(code, offset),
   1050                                       ImmPtr(code->raw()), ImmPtr(nullptr));
   1051  }
   1052 
   1053  CollectPerfSpewerJitCodeProfile(code, "RegExp");
   1054 
   1055 #ifdef MOZ_VTUNE
   1056  js::vtune::MarkStub(code, "RegExp");
   1057 #endif
   1058 
   1059  return Handle<HeapObject>(JS::PrivateGCThingValue(code), isolate());
   1060 }
   1061 
   1062 /*
   1063 * The stack will have the following structure:
   1064 *  sp-> - FrameData
   1065 *         - inputStart
   1066 *         - backtrack stack base
   1067 *         - matches
   1068 *         - numMatches
   1069 *       - Registers
   1070 *         - Capture positions
   1071 *         - Scratch registers
   1072 *       --- frame alignment ---
   1073 *       - Saved register area
   1074 *  fp-> - Frame pointer
   1075 *       - Return address
   1076 */
   1077 void SMRegExpMacroAssembler::createStackFrame() {
   1078 #ifdef JS_CODEGEN_ARM64
   1079  // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
   1080  // addressing.  The register we use for PSP may however also be used by
   1081  // calling code, and it is nonvolatile, so save it.  Do this as a special
   1082  // case first because the generic save/restore code needs the PSP to be
   1083  // initialized already.
   1084  MOZ_ASSERT(js::jit::PseudoStackPointer64.Is(masm_.GetStackPointer64()));
   1085  masm_.Str(js::jit::PseudoStackPointer64,
   1086            vixl::MemOperand(js::jit::sp, -16, vixl::PreIndex));
   1087 
   1088  // Initialize the PSP from the SP.
   1089  masm_.initPseudoStackPtr();
   1090 #endif
   1091 
   1092  masm_.Push(js::jit::FramePointer);
   1093  masm_.moveStackPtrTo(js::jit::FramePointer);
   1094 
   1095  // Push non-volatile registers which might be modified by jitcode.
   1096  for (GeneralRegisterForwardIterator iter(savedRegisters_); iter.more();
   1097       ++iter) {
   1098    masm_.Push(*iter);
   1099  }
   1100 
   1101  // The pointer to InputOutputData is passed as the first argument.
   1102  // On x86 we have to load it off the stack into temp0_.
   1103  // On other platforms it is already in a register.
   1104 #ifdef JS_CODEGEN_X86
   1105  Address ioDataAddr(js::jit::FramePointer, 2 * sizeof(void*));
   1106  masm_.loadPtr(ioDataAddr, temp0_);
   1107 #else
   1108  if (js::jit::IntArgReg0 != temp0_) {
   1109    masm_.movePtr(js::jit::IntArgReg0, temp0_);
   1110  }
   1111 #endif
   1112 
   1113  // Start a new stack frame.
   1114  size_t frameBytes = sizeof(FrameData) + num_registers_ * sizeof(void*);
   1115  frameSize_ = js::jit::StackDecrementForCall(js::jit::ABIStackAlignment,
   1116                                              masm_.framePushed(), frameBytes);
   1117  masm_.reserveStack(frameSize_);
   1118  masm_.checkStackAlignment();
   1119 
   1120  // Check if we have space on the stack. Use the *NoInterrupt stack limit to
   1121  // avoid failing repeatedly when the regex code is called from Ion JIT code.
   1122  // (See bug 1208819)
   1123  js::jit::Label stack_ok;
   1124  AbsoluteAddress limit_addr(cx_->addressOfJitStackLimitNoInterrupt());
   1125  masm_.branchStackPtrRhs(Assembler::Below, limit_addr, &stack_ok);
   1126 
   1127  // There is not enough space on the stack. Exit with an exception.
   1128  masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error)), temp0_);
   1129  masm_.jump(&exit_label_);
   1130 
   1131  masm_.bind(&stack_ok);
   1132 }
   1133 
   1134 void SMRegExpMacroAssembler::initFrameAndRegs() {
   1135  // At this point, an uninitialized stack frame has been created,
   1136  // and the address of the InputOutputData is in temp0_.
   1137  Register ioDataReg = temp0_;
   1138 
   1139  Register matchesReg = temp1_;
   1140  masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, matches)),
   1141                matchesReg);
   1142 
   1143  // Initialize output registers
   1144  // Use |backtrack_stack_pointer_| as an additional temp register. This is safe
   1145  // because we haven't yet written any data to |backtrack_stack_pointer_|.
   1146  Register extraTemp = backtrack_stack_pointer_;
   1147 
   1148  masm_.loadPtr(Address(matchesReg, MatchPairs::offsetOfPairs()), extraTemp);
   1149  masm_.storePtr(extraTemp, matches());
   1150  masm_.load32(Address(matchesReg, MatchPairs::offsetOfPairCount()), extraTemp);
   1151  masm_.store32(extraTemp, numMatches());
   1152 
   1153 #ifdef DEBUG
   1154  // Bounds-check numMatches. Note that callers that won't look at the captures
   1155  // can always pass numMatches == 1.
   1156  js::jit::Label enoughRegisters;
   1157  masm_.branchPtr(Assembler::Equal, extraTemp, ImmWord(1), &enoughRegisters);
   1158  masm_.branchPtr(Assembler::GreaterThanOrEqual, extraTemp,
   1159                  ImmWord(num_capture_registers_ / 2), &enoughRegisters);
   1160  masm_.assumeUnreachable("Not enough output pairs for RegExp");
   1161  masm_.bind(&enoughRegisters);
   1162 #endif
   1163 
   1164  // Load input start pointer.
   1165  masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputStart)),
   1166                current_position_);
   1167 
   1168  // Load input end pointer
   1169  masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputEnd)),
   1170                input_end_pointer_);
   1171 
   1172  // Set up input position to be negative offset from string end.
   1173  masm_.subPtr(input_end_pointer_, current_position_);
   1174 
   1175  // Store inputStart
   1176  masm_.storePtr(current_position_, inputStart());
   1177 
   1178  // Load start index
   1179  Register startIndexReg = temp1_;
   1180  masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, startIndex)),
   1181                startIndexReg);
   1182  masm_.computeEffectiveAddress(
   1183      BaseIndex(current_position_, startIndexReg, factor()), current_position_);
   1184 
   1185  // Initialize current_character_.
   1186  // Load newline if index is at start, or previous character otherwise.
   1187  js::jit::Label start_regexp;
   1188  js::jit::Label load_previous_character;
   1189  masm_.branchPtr(Assembler::NotEqual, startIndexReg, ImmWord(0),
   1190                  &load_previous_character);
   1191  masm_.movePtr(ImmWord('\n'), current_character_);
   1192  masm_.jump(&start_regexp);
   1193 
   1194  masm_.bind(&load_previous_character);
   1195  LoadCurrentCharacterUnchecked(-1, 1);
   1196  masm_.bind(&start_regexp);
   1197 
   1198  // Initialize captured registers with inputStart - 1
   1199  MOZ_ASSERT(num_capture_registers_ > 0);
   1200  Register inputStartMinusOneReg = temp0_;
   1201  masm_.loadPtr(inputStart(), inputStartMinusOneReg);
   1202  masm_.subPtr(Imm32(char_size()), inputStartMinusOneReg);
   1203  if (num_capture_registers_ > 8) {
   1204    masm_.movePtr(ImmWord(register_offset(0)), temp1_);
   1205    js::jit::Label init_loop;
   1206    masm_.bind(&init_loop);
   1207    masm_.storePtr(inputStartMinusOneReg, BaseIndex(masm_.getStackPointer(),
   1208                                                    temp1_, js::jit::TimesOne));
   1209    masm_.addPtr(ImmWord(sizeof(void*)), temp1_);
   1210    masm_.branchPtr(Assembler::LessThanOrEqual, temp1_,
   1211                    ImmWord(register_offset(num_capture_registers_ - 1)),
   1212                    &init_loop);
   1213  } else {
   1214    // Unroll the loop
   1215    for (int i = 0; i < num_capture_registers_; i++) {
   1216      masm_.storePtr(inputStartMinusOneReg, register_location(i));
   1217    }
   1218  }
   1219 
   1220  // Initialize backtrack stack pointer
   1221  masm_.loadPtr(AbsoluteAddress(ExternalReference::TopOfRegexpStack(isolate())),
   1222                backtrack_stack_pointer_);
   1223  masm_.storePtr(backtrack_stack_pointer_, backtrackStackBase());
   1224 }
   1225 
   1226 // Called when we find a match. May not be generated if we can
   1227 // determine ahead of time that a regexp cannot match: for example,
   1228 // when compiling /\u1e9e/ for latin-1 inputs.
   1229 void SMRegExpMacroAssembler::successHandler() {
   1230  if (!success_label_.used()) {
   1231    return;
   1232  }
   1233  masm_.bind(&success_label_);
   1234 
   1235  // Copy captures to the MatchPairs pointed to by the InputOutputData.
   1236  // Captures are stored as positions, which are negative byte offsets
   1237  // from the end of the string.  We must convert them to actual
   1238  // indices.
   1239  //
   1240  // Index:        [ 0 ][ 1 ][ 2 ][ 3 ][ 4 ][ 5 ][END]
   1241  // Pos (1-byte): [-6 ][-5 ][-4 ][-3 ][-2 ][-1 ][ 0 ] // IS = -6
   1242  // Pos (2-byte): [-12][-10][-8 ][-6 ][-4 ][-2 ][ 0 ] // IS = -12
   1243  //
   1244  // To convert a position to an index, we subtract InputStart, and
   1245  // divide the result by char_size.
   1246  Register matchesReg = temp1_;
   1247  masm_.loadPtr(matches(), matchesReg);
   1248 
   1249  // Use |backtrack_stack_pointer_| as an additional temp register. This is safe
   1250  // because we don't read from |backtrack_stack_pointer_| after this point.
   1251  Register extraTemp = backtrack_stack_pointer_;
   1252 
   1253  Register inputStartReg = extraTemp;
   1254  masm_.loadPtr(inputStart(), inputStartReg);
   1255 
   1256  auto copyRegister = [&](int reg) {
   1257    masm_.loadPtr(register_location(reg), temp0_);
   1258    masm_.subPtr(inputStartReg, temp0_);
   1259    if (mode_ == UC16) {
   1260      masm_.rshiftPtrArithmetic(Imm32(1), temp0_);
   1261    }
   1262    masm_.store32(temp0_, Address(matchesReg, reg * sizeof(int32_t)));
   1263  };
   1264 
   1265  // Copy first match pair.
   1266  MOZ_ASSERT(num_capture_registers_ >= 2);
   1267  copyRegister(0);
   1268  copyRegister(1);
   1269 
   1270  if (num_capture_registers_ > 2) {
   1271    // We always need the first match pair to update the `lastIndex` slot,
   1272    // but we can skip copying the capture groups if we won't look at them.
   1273    // This also allows our caller to avoid allocating space for unused results.
   1274    js::jit::Label earlyExitForTest;
   1275    masm_.branch32(Assembler::Equal, numMatches(), Imm32(1), &earlyExitForTest);
   1276 
   1277    for (int i = 2; i < num_capture_registers_; i++) {
   1278      copyRegister(i);
   1279    }
   1280 
   1281    masm_.bind(&earlyExitForTest);
   1282  }
   1283 
   1284  masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Success)), temp0_);
   1285  // This falls through to the exit handler.
   1286 }
   1287 
   1288 void SMRegExpMacroAssembler::exitHandler() {
   1289  masm_.bind(&exit_label_);
   1290 
   1291  if (temp0_ != js::jit::ReturnReg) {
   1292    masm_.movePtr(temp0_, js::jit::ReturnReg);
   1293  }
   1294 
   1295  masm_.freeStack(frameSize_);
   1296 
   1297  // Restore registers which were saved on entry
   1298  for (GeneralRegisterBackwardIterator iter(savedRegisters_); iter.more();
   1299       ++iter) {
   1300    masm_.Pop(*iter);
   1301  }
   1302 
   1303  masm_.Pop(js::jit::FramePointer);
   1304 
   1305 #ifdef JS_CODEGEN_ARM64
   1306  // Now restore the value that was in the PSP register on entry, and return.
   1307 
   1308  // Obtain the correct SP from the PSP.
   1309  masm_.Mov(js::jit::sp, js::jit::PseudoStackPointer64);
   1310 
   1311  // Restore the saved value of the PSP register, this value is whatever the
   1312  // caller had saved in it, not any actual SP value, and it must not be
   1313  // overwritten subsequently.
   1314  masm_.Ldr(js::jit::PseudoStackPointer64,
   1315            vixl::MemOperand(js::jit::sp, 16, vixl::PostIndex));
   1316 
   1317  // Perform a plain Ret(), as abiret() will move SP <- PSP and that is wrong.
   1318  masm_.Ret(vixl::lr);
   1319 #else
   1320  masm_.abiret();
   1321 #endif
   1322 
   1323  if (exit_with_exception_label_.used()) {
   1324    masm_.bind(&exit_with_exception_label_);
   1325 
   1326    // Exit with an error result to signal thrown exception
   1327    masm_.movePtr(ImmWord(int32_t(js::RegExpRunStatus::Error)), temp0_);
   1328    masm_.jump(&exit_label_);
   1329  }
   1330 }
   1331 
   1332 void SMRegExpMacroAssembler::backtrackHandler() {
   1333  if (!backtrack_label_.used()) {
   1334    return;
   1335  }
   1336  masm_.bind(&backtrack_label_);
   1337  Backtrack();
   1338 }
   1339 
   1340 void SMRegExpMacroAssembler::stackOverflowHandler() {
   1341  if (!stack_overflow_label_.used()) {
   1342    return;
   1343  }
   1344 
   1345  js::jit::AutoCreatedBy acb(masm_,
   1346                             "SMRegExpMacroAssembler::stackOverflowHandler");
   1347 
   1348  // Called if the backtrack-stack limit has been hit.
   1349  masm_.bind(&stack_overflow_label_);
   1350 
   1351  // Load argument
   1352  masm_.movePtr(ImmPtr(isolate()->regexp_stack()), temp1_);
   1353 
   1354  // Save registers before calling C function
   1355  LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
   1356 
   1357 #ifdef JS_USE_LINK_REGISTER
   1358  masm_.pushReturnAddress();
   1359 #endif
   1360 
   1361  // Adjust for the return address on the stack.
   1362  size_t frameOffset = sizeof(void*);
   1363 
   1364  volatileRegs.takeUnchecked(temp0_);
   1365  volatileRegs.takeUnchecked(temp1_);
   1366  masm_.PushRegsInMask(volatileRegs);
   1367 
   1368  using Fn = bool (*)(RegExpStack* regexp_stack);
   1369  masm_.setupUnalignedABICall(temp0_);
   1370  masm_.passABIArg(temp1_);
   1371  masm_.callWithABI<Fn, ::js::irregexp::GrowBacktrackStack>();
   1372  masm_.storeCallBoolResult(temp0_);
   1373 
   1374  masm_.PopRegsInMask(volatileRegs);
   1375 
   1376  // If GrowBacktrackStack returned false, we have failed to grow the
   1377  // stack, and must exit with a stack-overflow exception. Do this in
   1378  // the caller so that the stack is adjusted by our return instruction.
   1379  js::jit::Label overflow_return;
   1380  masm_.branchTest32(Assembler::Zero, temp0_, temp0_, &overflow_return);
   1381 
   1382  // Otherwise, store the new backtrack stack base and recompute the new
   1383  // top of the stack.
   1384  Address bsbAddress(masm_.getStackPointer(),
   1385                     offsetof(FrameData, backtrackStackBase) + frameOffset);
   1386  masm_.subPtr(bsbAddress, backtrack_stack_pointer_);
   1387 
   1388  masm_.loadPtr(AbsoluteAddress(ExternalReference::TopOfRegexpStack(isolate())),
   1389                temp1_);
   1390  masm_.storePtr(temp1_, bsbAddress);
   1391  masm_.addPtr(temp1_, backtrack_stack_pointer_);
   1392 
   1393  // Resume execution in calling code.
   1394  masm_.bind(&overflow_return);
   1395  masm_.ret();
   1396 }
   1397 
   1398 // This is only used by tracing code.
   1399 // The return value doesn't matter.
   1400 RegExpMacroAssembler::IrregexpImplementation
   1401 SMRegExpMacroAssembler::Implementation() {
   1402  return kBytecodeImplementation;
   1403 }
   1404 
   1405 // Compare two strings in `/i` mode (ignoreCase, but not unicode).
   1406 /*static */
   1407 uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(
   1408    const char16_t* substring1, const char16_t* substring2, size_t byteLength) {
   1409  js::AutoUnsafeCallWithABI unsafe;
   1410 
   1411  MOZ_ASSERT(byteLength % sizeof(char16_t) == 0);
   1412  size_t length = byteLength / sizeof(char16_t);
   1413 
   1414  for (size_t i = 0; i < length; i++) {
   1415    char16_t c1 = substring1[i];
   1416    char16_t c2 = substring2[i];
   1417    if (c1 != c2) {
   1418 #ifdef JS_HAS_INTL_API
   1419      // Non-unicode regexps have weird case-folding rules.
   1420      c1 = RegExpCaseFolding::Canonicalize(c1);
   1421      c2 = RegExpCaseFolding::Canonicalize(c2);
   1422 #else
   1423      // If we aren't building with ICU, fall back to `/iu` mode. The only
   1424      // differences are in corner cases.
   1425      c1 = js::unicode::FoldCase(c1);
   1426      c2 = js::unicode::FoldCase(c2);
   1427 #endif
   1428      if (c1 != c2) {
   1429        return 0;
   1430      }
   1431    }
   1432  }
   1433 
   1434  return 1;
   1435 }
   1436 
   1437 // Compare two strings in `/iu` mode (ignoreCase and unicode).
   1438 /*static */
   1439 uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareUnicode(
   1440    const char16_t* substring1, const char16_t* substring2, size_t byteLength) {
   1441  js::AutoUnsafeCallWithABI unsafe;
   1442 
   1443  MOZ_ASSERT(byteLength % sizeof(char16_t) == 0);
   1444  size_t length = byteLength / sizeof(char16_t);
   1445 
   1446  for (size_t i = 0; i < length; i++) {
   1447    char16_t c1 = substring1[i];
   1448    char16_t c2 = substring2[i];
   1449    if (c1 != c2) {
   1450      // Unicode regexps use the common and simple case-folding
   1451      // mappings of the Unicode Character Database.
   1452      c1 = js::unicode::FoldCase(c1);
   1453      c2 = js::unicode::FoldCase(c2);
   1454      if (c1 != c2) {
   1455        return 0;
   1456      }
   1457    }
   1458  }
   1459 
   1460  return 1;
   1461 }
   1462 
   1463 /* static */
   1464 bool SMRegExpMacroAssembler::GrowBacktrackStack(RegExpStack* regexp_stack) {
   1465  js::AutoUnsafeCallWithABI unsafe;
   1466  size_t size = regexp_stack->memory_size();
   1467  return !!regexp_stack->EnsureCapacity(size * 2);
   1468 }
   1469 
   1470 bool SMRegExpMacroAssembler::CanReadUnaligned() const {
   1471 #if defined(JS_CODEGEN_ARM)
   1472  return !js::jit::ARMFlags::HasAlignmentFault();
   1473 #elif defined(JS_CODEGEN_MIPS64)
   1474  return false;
   1475 #else
   1476  return true;
   1477 #endif
   1478 }
   1479 
   1480 }  // namespace internal
   1481 }  // namespace v8