tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

RegExpNativeMacroAssembler.h (13409B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 // Copyright 2020 the V8 project authors. All rights reserved.
      8 // Use of this source code is governed by a BSD-style license that can be
      9 // found in the LICENSE file.
     10 
     11 // This file implements the NativeRegExpMacroAssembler interface for
     12 // SpiderMonkey. It provides the same interface as each of V8's
     13 // architecture-specific implementations.
     14 
     15 #ifndef RegexpMacroAssemblerArch_h
     16 #define RegexpMacroAssemblerArch_h
     17 
     18 #include "irregexp/imported/regexp-macro-assembler.h"
     19 #include "jit/MacroAssembler.h"
     20 
     21 namespace v8 {
     22 namespace internal {
     23 
     24 struct FrameData {
     25  // Character position at the start of the input, stored as a
     26  // negative offset from the end of the string (input_end_pointer_).
     27  size_t inputStart;
     28 
     29  // The backtrack_stack_pointer_ register points to the top of the stack.
     30  // This points to the bottom of the backtrack stack.
     31  void* backtrackStackBase;
     32 
     33  // Copy of the input MatchPairs.
     34  int32_t* matches;    // pointer to capture array
     35  int32_t numMatches;  // size of capture array
     36 };
     37 
     38 class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler {
     39 public:
     40  SMRegExpMacroAssembler(JSContext* cx, js::jit::StackMacroAssembler& masm,
     41                         Zone* zone, Mode mode, uint32_t num_capture_registers);
     42  virtual ~SMRegExpMacroAssembler() = default;
     43 
     44  virtual int stack_limit_slack_slot_count();
     45  virtual IrregexpImplementation Implementation();
     46 
     47  virtual bool Succeed();
     48  virtual void Fail();
     49 
     50  virtual void AdvanceCurrentPosition(int by);
     51  virtual void PopCurrentPosition();
     52  virtual void PushCurrentPosition();
     53  virtual void SetCurrentPositionFromEnd(int by);
     54 
     55  virtual void Backtrack();
     56  virtual void Bind(Label* label);
     57  virtual void GoTo(Label* label);
     58  virtual void PushBacktrack(Label* label);
     59 
     60  virtual void CheckCharacter(uint32_t c, Label* on_equal);
     61  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
     62  virtual void CheckCharacterGT(base::uc16 limit, Label* on_greater);
     63  virtual void CheckCharacterLT(base::uc16 limit, Label* on_less);
     64  virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
     65                                      Label* on_equal);
     66  virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask,
     67                                         Label* on_not_equal);
     68  virtual void CheckNotCharacterAfterMinusAnd(base::uc16 c, base::uc16 minus,
     69                                              base::uc16 mask,
     70                                              Label* on_not_equal);
     71  virtual void CheckFixedLengthLoop(Label* on_tos_equals_current_position);
     72  virtual void CheckCharacterInRange(base::uc16 from, base::uc16 to,
     73                                     Label* on_in_range);
     74  virtual void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
     75                                        Label* on_not_in_range);
     76  virtual bool CheckCharacterInRangeArray(
     77      const ZoneList<CharacterRange>* ranges, Label* on_in_range);
     78  virtual bool CheckCharacterNotInRangeArray(
     79      const ZoneList<CharacterRange>* ranges, Label* on_not_in_range);
     80  virtual void CheckAtStart(int cp_offset, Label* on_at_start);
     81  virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
     82  virtual void CheckPosition(int cp_offset, Label* on_outside_input);
     83  virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
     84  virtual void SkipUntilBitInTable(int cp_offset, Handle<ByteArray> table,
     85                                   Handle<ByteArray> nibble_table,
     86                                   int advance_by);
     87  virtual bool SkipUntilBitInTableUseSimd(int advance_by);
     88  virtual bool CheckSpecialCharacterClass(StandardCharacterSet type,
     89                                          Label* on_no_match);
     90  virtual void CheckNotBackReference(int start_reg, bool read_backward,
     91                                     Label* on_no_match);
     92  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
     93                                               bool read_backward, bool unicode,
     94                                               Label* on_no_match);
     95 
     96  virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
     97                                        bool check_bounds, int characters,
     98                                        int eats_at_least);
     99 
    100  virtual void AdvanceRegister(int reg, int by);
    101  virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
    102  virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
    103  virtual void IfRegisterEqPos(int reg, Label* if_eq);
    104  virtual void PopRegister(int register_index);
    105  virtual void PushRegister(int register_index,
    106                            StackCheckFlag check_stack_limit);
    107  virtual void ReadCurrentPositionFromRegister(int reg);
    108  virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
    109  virtual void ReadStackPointerFromRegister(int reg);
    110  virtual void WriteStackPointerToRegister(int reg);
    111  virtual void SetRegister(int register_index, int to);
    112  virtual void ClearRegisters(int reg_from, int reg_to);
    113 
    114  virtual Handle<HeapObject> GetCode(Handle<String> source, RegExpFlags flags);
    115 
    116  virtual bool CanReadUnaligned() const;
    117 
    118 private:
    119  size_t frameSize_ = 0;
    120 
    121  void createStackFrame();
    122  void initFrameAndRegs();
    123  void successHandler();
    124  void exitHandler();
    125  void backtrackHandler();
    126  void stackOverflowHandler();
    127 
    128  // Push a register on the backtrack stack.
    129  void Push(js::jit::Register value);
    130 
    131  // Pop a value from the backtrack stack.
    132  void Pop(js::jit::Register target);
    133 
    134  void CheckAtStartImpl(int cp_offset, Label* on_cond,
    135                        js::jit::Assembler::Condition cond);
    136  void CheckCharacterImpl(js::jit::Imm32 c, Label* on_cond,
    137                          js::jit::Assembler::Condition cond);
    138  void CheckCharacterAfterAndImpl(uint32_t c, uint32_t and_with, Label* on_cond,
    139                                  bool negate);
    140  void CheckCharacterInRangeImpl(base::uc16 from, base::uc16 to, Label* on_cond,
    141                                 js::jit::Assembler::Condition cond);
    142  void CheckNotBackReferenceImpl(int start_reg, bool read_backward,
    143                                 bool unicode, Label* on_no_match,
    144                                 bool ignore_case);
    145  void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges);
    146 
    147  void LoadCurrentCharacterUnchecked(int cp_offset, int characters);
    148 
    149  void JumpOrBacktrack(Label* to);
    150 
    151  // MacroAssembler methods that take a Label can be called with a
    152  // null label, which means that we should backtrack if we would jump
    153  // to that label. This is a helper to avoid writing out the same
    154  // logic a dozen times.
    155  inline js::jit::Label* LabelOrBacktrack(Label* to) {
    156    return to ? to->inner() : &backtrack_label_;
    157  }
    158 
    159  void CheckBacktrackStackLimit();
    160 
    161 public:
    162  static bool GrowBacktrackStack(RegExpStack* regexp_stack);
    163 
    164  static uint32_t CaseInsensitiveCompareNonUnicode(const char16_t* substring1,
    165                                                   const char16_t* substring2,
    166                                                   size_t byteLength);
    167  static uint32_t CaseInsensitiveCompareUnicode(const char16_t* substring1,
    168                                                const char16_t* substring2,
    169                                                size_t byteLength);
    170  static bool IsCharacterInRangeArray(uint32_t c, ByteArrayData* ranges);
    171 
    172 private:
    173  inline int char_size() { return static_cast<int>(mode_); }
    174  inline js::jit::Scale factor() {
    175    return mode_ == UC16 ? js::jit::TimesTwo : js::jit::TimesOne;
    176  }
    177 
    178  js::jit::Address inputStart() {
    179    return js::jit::Address(masm_.getStackPointer(),
    180                            offsetof(FrameData, inputStart));
    181  }
    182  js::jit::Address backtrackStackBase() {
    183    return js::jit::Address(masm_.getStackPointer(),
    184                            offsetof(FrameData, backtrackStackBase));
    185  }
    186  js::jit::Address matches() {
    187    return js::jit::Address(masm_.getStackPointer(),
    188                            offsetof(FrameData, matches));
    189  }
    190  js::jit::Address numMatches() {
    191    return js::jit::Address(masm_.getStackPointer(),
    192                            offsetof(FrameData, numMatches));
    193  }
    194 
    195  // The stack-pointer-relative location of a regexp register.
    196  js::jit::Address register_location(int register_index) {
    197    return js::jit::Address(masm_.getStackPointer(),
    198                            register_offset(register_index));
    199  }
    200 
    201  int32_t register_offset(int register_index) {
    202    MOZ_ASSERT(register_index >= 0 && register_index <= kMaxRegister);
    203    if (num_registers_ <= register_index) {
    204      num_registers_ = register_index + 1;
    205    }
    206    static_assert(alignof(uintptr_t) <= alignof(FrameData));
    207    return sizeof(FrameData) + register_index * sizeof(uintptr_t*);
    208  }
    209 
    210  JSContext* cx_;
    211  js::jit::StackMacroAssembler& masm_;
    212 
    213  /*
    214   * This assembler uses the following registers:
    215   *
    216   * - current_character_:
    217   *     Contains the character (or characters) currently being examined.
    218   *     Must be loaded using LoadCurrentCharacter before using any of the
    219   *     dispatch methods. After a matching pass for a global regexp,
    220   *     temporarily stores the index of capture start.
    221   * - current_position_:
    222   *     Current position in input *as negative byte offset from end of string*.
    223   * - input_end_pointer_:
    224   *     Points to byte after last character in the input. current_position_ is
    225   *     relative to this.
    226   * - backtrack_stack_pointer_:
    227   *     Points to tip of the (heap-allocated) backtrack stack. The stack grows
    228   *     downward (like the native stack).
    229   * - temp0_, temp1_, temp2_:
    230   *     Scratch registers.
    231   *
    232   * The native stack pointer is used to access arguments (InputOutputData),
    233   * local variables (FrameData), and irregexp's internal virtual registers
    234   * (see register_location).
    235   */
    236 
    237  js::jit::Register current_character_;
    238  js::jit::Register current_position_;
    239  js::jit::Register input_end_pointer_;
    240  js::jit::Register backtrack_stack_pointer_;
    241  js::jit::Register temp0_, temp1_, temp2_;
    242 
    243  // These labels are used in various API calls and bound (if used) in
    244  // GetCode. If we abort in the middle of a compilation, as may
    245  // happen if a regexp is too big, they may be used but not
    246  // bound.
    247  js::jit::NonAssertingLabel entry_label_;
    248  js::jit::NonAssertingLabel start_label_;
    249  js::jit::NonAssertingLabel backtrack_label_;
    250  js::jit::NonAssertingLabel success_label_;
    251  js::jit::NonAssertingLabel exit_label_;
    252  js::jit::NonAssertingLabel stack_overflow_label_;
    253  js::jit::NonAssertingLabel exit_with_exception_label_;
    254 
    255  // When we generate the code to push a backtrack label's address
    256  // onto the backtrack stack, we don't know its final address. We
    257  // have to patch it after linking. This is slightly delicate, as the
    258  // Label itself (which is allocated on the stack) may not exist by
    259  // the time we link. The approach is as follows:
    260  //
    261  // 1. When we push a label on the backtrack stack (PushBacktrack),
    262  //    we bind the label's patchOffset_ field to the offset within
    263  //    the code that should be overwritten. This works because each
    264  //    label is only pushed by a single instruction.
    265  //
    266  // 2. When we bind a label (Bind), we check to see if it has a
    267  //    bound patchOffset_. If it does, we create a LabelPatch mapping
    268  //    its patch offset to the offset of the label itself.
    269  //
    270  // 3. While linking the code, we walk the list of label patches
    271  //    and patch the code accordingly.
    272  //
    273  // 4. Finally, we patch in the code base address that is added to the
    274  //    label offset to calculate the actual address to jump to.
    275  class LabelPatch {
    276   public:
    277    LabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset)
    278        : patchOffset_(patchOffset), labelOffset_(labelOffset) {}
    279 
    280    js::jit::CodeOffset patchOffset_;
    281    size_t labelOffset_ = 0;
    282  };
    283 
    284  js::Vector<LabelPatch, 4, js::SystemAllocPolicy> labelPatches_;
    285  void AddLabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) {
    286    js::AutoEnterOOMUnsafeRegion oomUnsafe;
    287    if (!labelPatches_.emplaceBack(patchOffset, labelOffset)) {
    288      oomUnsafe.crash("Irregexp label patch");
    289    }
    290  }
    291 
    292  js::Vector<js::jit::CodeOffset, 4, js::SystemAllocPolicy>
    293      backtrackCodeOffsetPatches_;
    294  void PushBacktrackCodeOffsetPatch(js::jit::CodeOffset offset) {
    295    js::AutoEnterOOMUnsafeRegion oomUnsafe;
    296    if (!backtrackCodeOffsetPatches_.append(offset)) {
    297      oomUnsafe.crash("Irregexp backtrack code offset patch");
    298    }
    299  }
    300 
    301  Mode mode_;
    302  int num_registers_;
    303  int num_capture_registers_;
    304  js::jit::LiveGeneralRegisterSet savedRegisters_;
    305 
    306 public:
    307  using TableVector =
    308      js::Vector<PseudoHandle<ByteArrayData>, 4, js::SystemAllocPolicy>;
    309  TableVector& tables() { return tables_; }
    310 
    311 private:
    312  TableVector tables_;
    313  void AddTable(PseudoHandle<ByteArrayData> table) {
    314    js::AutoEnterOOMUnsafeRegion oomUnsafe;
    315    if (!tables_.append(std::move(table))) {
    316      oomUnsafe.crash("Irregexp table append");
    317    }
    318  }
    319 };
    320 
    321 }  // namespace internal
    322 }  // namespace v8
    323 
    324 #endif  // RegexpMacroAssemblerArch_h