tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

regexp-nodes.h (34020B)


      1 // Copyright 2019 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_REGEXP_REGEXP_NODES_H_
      6 #define V8_REGEXP_REGEXP_NODES_H_
      7 
      8 #include "irregexp/imported/regexp-macro-assembler.h"
      9 
     10 namespace v8 {
     11 namespace internal {
     12 
     13 class AlternativeGenerationList;
     14 class BoyerMooreLookahead;
     15 class FixedLengthLoopState;
     16 class NodeVisitor;
     17 class QuickCheckDetails;
     18 class RegExpCompiler;
     19 class SeqRegExpNode;
     20 class Trace;
     21 struct PreloadState;
     22 
     23 #define FOR_EACH_NODE_TYPE(VISIT) \
     24  VISIT(End)                      \
     25  VISIT(Action)                   \
     26  VISIT(Choice)                   \
     27  VISIT(LoopChoice)               \
     28  VISIT(NegativeLookaroundChoice) \
     29  VISIT(BackReference)            \
     30  VISIT(Assertion)                \
     31  VISIT(Text)
     32 
     33 #define FORWARD_DECLARE(type) class type##Node;
     34 FOR_EACH_NODE_TYPE(FORWARD_DECLARE)
     35 #undef FORWARD_DECLARE
     36 
     37 struct NodeInfo final {
     38  NodeInfo()
     39      : being_analyzed(false),
     40        been_analyzed(false),
     41        follows_word_interest(false),
     42        follows_newline_interest(false),
     43        follows_start_interest(false),
     44        at_end(false),
     45        visited(false),
     46        replacement_calculated(false) {}
     47 
     48  // Returns true if the interests and assumptions of this node
     49  // matches the given one.
     50  bool Matches(NodeInfo* that) {
     51    return (at_end == that->at_end) &&
     52           (follows_word_interest == that->follows_word_interest) &&
     53           (follows_newline_interest == that->follows_newline_interest) &&
     54           (follows_start_interest == that->follows_start_interest);
     55  }
     56 
     57  // Updates the interests of this node given the interests of the
     58  // node preceding it.
     59  void AddFromPreceding(NodeInfo* that) {
     60    at_end |= that->at_end;
     61    follows_word_interest |= that->follows_word_interest;
     62    follows_newline_interest |= that->follows_newline_interest;
     63    follows_start_interest |= that->follows_start_interest;
     64  }
     65 
     66  bool HasLookbehind() {
     67    return follows_word_interest || follows_newline_interest ||
     68           follows_start_interest;
     69  }
     70 
     71  // Sets the interests of this node to include the interests of the
     72  // following node.
     73  void AddFromFollowing(NodeInfo* that) {
     74    follows_word_interest |= that->follows_word_interest;
     75    follows_newline_interest |= that->follows_newline_interest;
     76    follows_start_interest |= that->follows_start_interest;
     77  }
     78 
     79  void ResetCompilationState() {
     80    being_analyzed = false;
     81    been_analyzed = false;
     82  }
     83 
     84  bool being_analyzed : 1;
     85  bool been_analyzed : 1;
     86 
     87  // These bits are set of this node has to know what the preceding
     88  // character was.
     89  bool follows_word_interest : 1;
     90  bool follows_newline_interest : 1;
     91  bool follows_start_interest : 1;
     92 
     93  bool at_end : 1;
     94  bool visited : 1;
     95  bool replacement_calculated : 1;
     96 };
     97 
     98 struct EatsAtLeastInfo final {
     99  EatsAtLeastInfo() : EatsAtLeastInfo(0) {}
    100  explicit EatsAtLeastInfo(uint8_t eats)
    101      : eats_at_least_from_possibly_start(eats),
    102        eats_at_least_from_not_start(eats) {}
    103  void SetMin(const EatsAtLeastInfo& other) {
    104    if (other.eats_at_least_from_possibly_start <
    105        eats_at_least_from_possibly_start) {
    106      eats_at_least_from_possibly_start =
    107          other.eats_at_least_from_possibly_start;
    108    }
    109    if (other.eats_at_least_from_not_start < eats_at_least_from_not_start) {
    110      eats_at_least_from_not_start = other.eats_at_least_from_not_start;
    111    }
    112  }
    113 
    114  bool IsZero() const {
    115    return eats_at_least_from_possibly_start == 0 &&
    116           eats_at_least_from_not_start == 0;
    117  }
    118 
    119  // Any successful match starting from the current node will consume at least
    120  // this many characters. This does not necessarily mean that there is a
    121  // possible match with exactly this many characters, but we generally try to
    122  // get this number as high as possible to allow for early exit on failure.
    123  uint8_t eats_at_least_from_possibly_start;
    124 
    125  // Like eats_at_least_from_possibly_start, but with the additional assumption
    126  // that start-of-string assertions (^) can't match. This value is greater than
    127  // or equal to eats_at_least_from_possibly_start.
    128  uint8_t eats_at_least_from_not_start;
    129 };
    130 
    131 class RegExpNode : public ZoneObject {
    132 public:
    133  explicit RegExpNode(Zone* zone)
    134      : replacement_(nullptr),
    135        on_work_list_(false),
    136        trace_count_(0),
    137        zone_(zone) {
    138    bm_info_[0] = bm_info_[1] = nullptr;
    139  }
    140  virtual ~RegExpNode();
    141  virtual void Accept(NodeVisitor* visitor) = 0;
    142  // Generates a goto to this node or actually generates the code at this point.
    143  virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
    144  // How many characters must this node consume at a minimum in order to
    145  // succeed.  The not_at_start argument is used to indicate that we know we are
    146  // not at the start of the input.  In this case anchored branches will always
    147  // fail and can be ignored when determining how many characters are consumed
    148  // on success.  If this node has not been analyzed yet, EatsAtLeast returns 0.
    149  uint32_t EatsAtLeast(bool not_at_start);
    150  // Returns how many characters this node must consume in order to succeed,
    151  // given that this is a LoopChoiceNode whose counter register is in a
    152  // newly-initialized state at the current position in the generated code. For
    153  // example, consider /a{6,8}/. Absent any extra information, the
    154  // LoopChoiceNode for the repetition must report that it consumes at least
    155  // zero characters, because it may have already looped several times. However,
    156  // with a newly-initialized counter, it can report that it consumes at least
    157  // six characters.
    158  virtual EatsAtLeastInfo EatsAtLeastFromLoopEntry();
    159  // Emits some quick code that checks whether the preloaded characters match.
    160  // Falls through on certain failure, jumps to the label on possible success.
    161  // If the node cannot make a quick check it does nothing and returns false.
    162  bool EmitQuickCheck(RegExpCompiler* compiler, Trace* bounds_check_trace,
    163                      Trace* trace, bool preload_has_checked_bounds,
    164                      Label* on_possible_success,
    165                      QuickCheckDetails* details_return,
    166                      bool fall_through_on_failure, ChoiceNode* predecessor);
    167  // For a given number of characters this returns a mask and a value.  The
    168  // next n characters are anded with the mask and compared with the value.
    169  // A comparison failure indicates the node cannot match the next n characters.
    170  // A comparison success indicates the node may match.
    171  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
    172                                    RegExpCompiler* compiler,
    173                                    int characters_filled_in,
    174                                    bool not_at_start) = 0;
    175  // Fills in quick check details for this node, given that this is a
    176  // LoopChoiceNode whose counter register is in a newly-initialized state at
    177  // the current position in the generated code. For example, consider /a{6,8}/.
    178  // Absent any extra information, the LoopChoiceNode for the repetition cannot
    179  // generate any useful quick check because a match might be the (empty)
    180  // continuation node. However, with a newly-initialized counter, it can
    181  // generate a quick check for several 'a' characters at once.
    182  virtual void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
    183                                                 RegExpCompiler* compiler,
    184                                                 int characters_filled_in,
    185                                                 bool not_at_start);
    186  static const int kNodeIsTooComplexForFixedLengthLoops = kMinInt;
    187  virtual int FixedLengthLoopLength() {
    188    return kNodeIsTooComplexForFixedLengthLoops;
    189  }
    190  // Only returns the successor for a text node of length 1 that matches any
    191  // character and that has no guards on it.
    192  virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
    193      RegExpCompiler* compiler) {
    194    return nullptr;
    195  }
    196 
    197  // Collects information on the possible code units (mod 128) that can match if
    198  // we look forward.  This is used for a Boyer-Moore-like string searching
    199  // implementation.  TODO(erikcorry):  This should share more code with
    200  // EatsAtLeast, GetQuickCheckDetails.  The budget argument is used to limit
    201  // the number of nodes we are willing to look at in order to create this data.
    202  static const int kRecursionBudget = 200;
    203  bool KeepRecursing(RegExpCompiler* compiler);
    204  virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
    205                            BoyerMooreLookahead* bm, bool not_at_start) {
    206    UNREACHABLE();
    207  }
    208 
    209  // If we know that the input is one-byte then there are some nodes that can
    210  // never match.  This method returns a node that can be substituted for
    211  // itself, or nullptr if the node can never match.
    212  virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) {
    213    return this;
    214  }
    215  // Helper for FilterOneByte.
    216  RegExpNode* replacement() {
    217    DCHECK(info()->replacement_calculated);
    218    return replacement_;
    219  }
    220  RegExpNode* set_replacement(RegExpNode* replacement) {
    221    info()->replacement_calculated = true;
    222    replacement_ = replacement;
    223    return replacement;  // For convenience.
    224  }
    225 
    226  // We want to avoid recalculating the lookahead info, so we store it on the
    227  // node.  Only info that is for this node is stored.  We can tell that the
    228  // info is for this node when offset == 0, so the information is calculated
    229  // relative to this node.
    230  void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) {
    231    if (offset == 0) set_bm_info(not_at_start, bm);
    232  }
    233 
    234  Label* label() { return &label_; }
    235  // If non-generic code is generated for a node (i.e. the node is not at the
    236  // start of the trace) then it cannot be reused.  This variable sets a limit
    237  // on how often we allow that to happen before we insist on starting a new
    238  // trace and generating generic code for a node that can be reused by flushing
    239  // the deferred actions in the current trace and generating a goto.
    240  static const int kMaxCopiesCodeGenerated = 10;
    241 
    242  bool on_work_list() { return on_work_list_; }
    243  void set_on_work_list(bool value) { on_work_list_ = value; }
    244 
    245  NodeInfo* info() { return &info_; }
    246  const EatsAtLeastInfo* eats_at_least_info() const { return &eats_at_least_; }
    247  void set_eats_at_least_info(const EatsAtLeastInfo& eats_at_least) {
    248    eats_at_least_ = eats_at_least;
    249  }
    250 
    251  // TODO(v8:10441): This is a hacky way to avoid exponential code size growth
    252  // for very large choice nodes that can be generated by unicode property
    253  // escapes. In order to avoid inlining (i.e. trace recursion), we pretend to
    254  // have generated the maximum count of code copies already.
    255  // We should instead fix this properly, e.g. by using the code size budget
    256  // (flush_budget) or by generating property escape matches as calls to a C
    257  // function.
    258  void SetDoNotInline() { trace_count_ = kMaxCopiesCodeGenerated; }
    259 
    260  BoyerMooreLookahead* bm_info(bool not_at_start) {
    261    return bm_info_[not_at_start ? 1 : 0];
    262  }
    263 
    264 #define DECLARE_CAST(type) \
    265  virtual type##Node* As##type##Node() { return nullptr; }
    266  FOR_EACH_NODE_TYPE(DECLARE_CAST)
    267 #undef DECLARE_CAST
    268 
    269  virtual SeqRegExpNode* AsSeqRegExpNode() { return nullptr; }
    270 
    271  Zone* zone() const { return zone_; }
    272 
    273 protected:
    274  enum LimitResult { DONE, CONTINUE };
    275  RegExpNode* replacement_;
    276 
    277  LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
    278 
    279  void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
    280    bm_info_[not_at_start ? 1 : 0] = bm;
    281  }
    282 
    283 private:
    284  static const int kFirstCharBudget = 10;
    285  Label label_;
    286  bool on_work_list_;
    287  NodeInfo info_;
    288 
    289  // Saved values for EatsAtLeast results, to avoid recomputation. Filled in
    290  // during analysis (valid if info_.been_analyzed is true).
    291  EatsAtLeastInfo eats_at_least_;
    292 
    293  // This variable keeps track of how many times code has been generated for
    294  // this node (in different traces).  We don't keep track of where the
    295  // generated code is located unless the code is generated at the start of
    296  // a trace, in which case it is generic and can be reused by flushing the
    297  // deferred operations in the current trace and generating a goto.
    298  int trace_count_;
    299  BoyerMooreLookahead* bm_info_[2];
    300 
    301  Zone* zone_;
    302 };
    303 
    304 class SeqRegExpNode : public RegExpNode {
    305 public:
    306  explicit SeqRegExpNode(RegExpNode* on_success)
    307      : RegExpNode(on_success->zone()), on_success_(on_success) {}
    308  RegExpNode* on_success() { return on_success_; }
    309  void set_on_success(RegExpNode* node) { on_success_ = node; }
    310  RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) override;
    311  void FillInBMInfo(Isolate* isolate, int offset, int budget,
    312                    BoyerMooreLookahead* bm, bool not_at_start) override {
    313    on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
    314    if (offset == 0) set_bm_info(not_at_start, bm);
    315  }
    316  SeqRegExpNode* AsSeqRegExpNode() override { return this; }
    317 
    318 protected:
    319  RegExpNode* FilterSuccessor(int depth, RegExpCompiler* compiler);
    320 
    321 private:
    322  RegExpNode* on_success_;
    323 };
    324 
    325 class ActionNode : public SeqRegExpNode {
    326 public:
    327  enum ActionType {
    328    SET_REGISTER_FOR_LOOP,
    329    INCREMENT_REGISTER,
    330    CLEAR_POSITION,
    331    RESTORE_POSITION,
    332    BEGIN_POSITIVE_SUBMATCH,
    333    BEGIN_NEGATIVE_SUBMATCH,
    334    POSITIVE_SUBMATCH_SUCCESS,
    335    EMPTY_MATCH_CHECK,
    336    CLEAR_CAPTURES,
    337    MODIFY_FLAGS
    338  };
    339  static ActionNode* SetRegisterForLoop(int reg, int val,
    340                                        RegExpNode* on_success);
    341  static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
    342  static ActionNode* ClearPosition(int reg, RegExpNode* on_success);
    343  static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
    344  static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
    345  static ActionNode* BeginPositiveSubmatch(int stack_pointer_reg,
    346                                           int position_reg, RegExpNode* body,
    347                                           ActionNode* success_node);
    348  static ActionNode* BeginNegativeSubmatch(int stack_pointer_reg,
    349                                           int position_reg,
    350                                           RegExpNode* on_success);
    351  static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
    352                                             int restore_reg,
    353                                             int clear_capture_count,
    354                                             int clear_capture_from,
    355                                             RegExpNode* on_success);
    356  static ActionNode* EmptyMatchCheck(int start_register,
    357                                     int repetition_register,
    358                                     int repetition_limit,
    359                                     RegExpNode* on_success);
    360  static ActionNode* ModifyFlags(RegExpFlags flags, RegExpNode* on_success);
    361  ActionNode* AsActionNode() override { return this; }
    362  void Accept(NodeVisitor* visitor) override;
    363  void Emit(RegExpCompiler* compiler, Trace* trace) override;
    364  void GetQuickCheckDetails(QuickCheckDetails* details,
    365                            RegExpCompiler* compiler, int filled_in,
    366                            bool not_at_start) override;
    367  void FillInBMInfo(Isolate* isolate, int offset, int budget,
    368                    BoyerMooreLookahead* bm, bool not_at_start) override;
    369  ActionType action_type() const { return action_type_; }
    370  // TODO(erikcorry): We should allow some action nodes in fixed length loops.
    371  int FixedLengthLoopLength() override {
    372    return kNodeIsTooComplexForFixedLengthLoops;
    373  }
    374  RegExpFlags flags() const {
    375    DCHECK_EQ(action_type(), MODIFY_FLAGS);
    376    return RegExpFlags{data_.u_modify_flags.flags};
    377  }
    378  ActionNode* success_node() const {
    379    DCHECK_EQ(action_type(), BEGIN_POSITIVE_SUBMATCH);
    380    return data_.u_submatch.success_node;
    381  }
    382 
    383  bool Mentions(int reg) const {
    384    return base::IsInRange(reg, register_from(), register_to());
    385  }
    386 
    387  int value() const {
    388    DCHECK(action_type() == SET_REGISTER_FOR_LOOP);
    389    return data_.u_simple.value;
    390  }
    391 
    392  bool IsSimpleAction() const {
    393    return action_type() == CLEAR_POSITION ||
    394           action_type() == RESTORE_POSITION ||
    395           action_type() == INCREMENT_REGISTER ||
    396           action_type() == SET_REGISTER_FOR_LOOP ||
    397           action_type() == CLEAR_CAPTURES;
    398  }
    399 
    400  int register_from() const {
    401    DCHECK(IsSimpleAction());
    402    return data_.u_simple.register_from;
    403  }
    404 
    405  int register_to() const { return data_.u_simple.register_to; }
    406 
    407 protected:
    408  ActionNode(ActionType action_type, RegExpNode* on_success)
    409      : SeqRegExpNode(on_success), action_type_(action_type) {}
    410 
    411  ActionNode(ActionType action_type, RegExpNode* on_success, int from,
    412             int to = -1, int value = 0)
    413      : SeqRegExpNode(on_success), action_type_(action_type) {
    414    data_.u_simple.register_from = from;
    415    data_.u_simple.register_to = to == -1 ? from : to;
    416    data_.u_simple.value = value;
    417    DCHECK(IsSimpleAction());
    418  }
    419 
    420 private:
    421  union {
    422    struct {
    423      int register_from;
    424      int register_to;
    425      int value;
    426    } u_simple;
    427    struct {
    428      int stack_pointer_register;
    429      int current_position_register;
    430      int clear_register_count;
    431      int clear_register_from;
    432      ActionNode* success_node;  // Only used for positive submatch.
    433    } u_submatch;
    434    struct {
    435      int start_register;
    436      int repetition_register;
    437      int repetition_limit;
    438    } u_empty_match_check;
    439    struct {
    440      int flags;
    441    } u_modify_flags;
    442  } data_;
    443 
    444  ActionType action_type_;
    445  friend class DotPrinterImpl;
    446  friend Zone;
    447 };
    448 
    449 class TextNode : public SeqRegExpNode {
    450 public:
    451  TextNode(ZoneList<TextElement>* elms, bool read_backward,
    452           RegExpNode* on_success)
    453      : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
    454  TextNode(RegExpClassRanges* that, bool read_backward, RegExpNode* on_success)
    455      : SeqRegExpNode(on_success),
    456        elms_(zone()->New<ZoneList<TextElement>>(1, zone())),
    457        read_backward_(read_backward) {
    458    elms_->Add(TextElement::ClassRanges(that), zone());
    459  }
    460  // Create TextNode for a single character class for the given ranges.
    461  static TextNode* CreateForCharacterRanges(Zone* zone,
    462                                            ZoneList<CharacterRange>* ranges,
    463                                            bool read_backward,
    464                                            RegExpNode* on_success);
    465  // Create TextNode for a surrogate pair (i.e. match a sequence of two uc16
    466  // code unit ranges).
    467  static TextNode* CreateForSurrogatePair(
    468      Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges,
    469      bool read_backward, RegExpNode* on_success);
    470  static TextNode* CreateForSurrogatePair(Zone* zone,
    471                                          ZoneList<CharacterRange>* lead_ranges,
    472                                          CharacterRange trail,
    473                                          bool read_backward,
    474                                          RegExpNode* on_success);
    475  TextNode* AsTextNode() override { return this; }
    476  void Accept(NodeVisitor* visitor) override;
    477  void Emit(RegExpCompiler* compiler, Trace* trace) override;
    478  void GetQuickCheckDetails(QuickCheckDetails* details,
    479                            RegExpCompiler* compiler, int characters_filled_in,
    480                            bool not_at_start) override;
    481  ZoneList<TextElement>* elements() { return elms_; }
    482  bool read_backward() { return read_backward_; }
    483  void MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
    484                           RegExpFlags flags);
    485  int FixedLengthLoopLength() override;
    486  RegExpNode* GetSuccessorOfOmnivorousTextNode(
    487      RegExpCompiler* compiler) override;
    488  void FillInBMInfo(Isolate* isolate, int offset, int budget,
    489                    BoyerMooreLookahead* bm, bool not_at_start) override;
    490  void CalculateOffsets();
    491  RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) override;
    492  int Length();
    493 
    494 private:
    495  enum TextEmitPassType {
    496    NON_LATIN1_MATCH,            // Check for characters that can never match.
    497    SIMPLE_CHARACTER_MATCH,      // Case-dependent single character check.
    498    NON_LETTER_CHARACTER_MATCH,  // Check characters that have no case equivs.
    499    CASE_CHARACTER_MATCH,        // Case-independent single character check.
    500    CHARACTER_CLASS_MATCH        // Character class.
    501  };
    502  void TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
    503                    bool preloaded, Trace* trace, bool first_element_checked,
    504                    int* checked_up_to);
    505  ZoneList<TextElement>* elms_;
    506  bool read_backward_;
    507 };
    508 
    509 class AssertionNode : public SeqRegExpNode {
    510 public:
    511  enum AssertionType {
    512    AT_END,
    513    AT_START,
    514    AT_BOUNDARY,
    515    AT_NON_BOUNDARY,
    516    AFTER_NEWLINE
    517  };
    518  static AssertionNode* AtEnd(RegExpNode* on_success) {
    519    return on_success->zone()->New<AssertionNode>(AT_END, on_success);
    520  }
    521  static AssertionNode* AtStart(RegExpNode* on_success) {
    522    return on_success->zone()->New<AssertionNode>(AT_START, on_success);
    523  }
    524  static AssertionNode* AtBoundary(RegExpNode* on_success) {
    525    return on_success->zone()->New<AssertionNode>(AT_BOUNDARY, on_success);
    526  }
    527  static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
    528    return on_success->zone()->New<AssertionNode>(AT_NON_BOUNDARY, on_success);
    529  }
    530  static AssertionNode* AfterNewline(RegExpNode* on_success) {
    531    return on_success->zone()->New<AssertionNode>(AFTER_NEWLINE, on_success);
    532  }
    533  AssertionNode* AsAssertionNode() override { return this; }
    534  void Accept(NodeVisitor* visitor) override;
    535  void Emit(RegExpCompiler* compiler, Trace* trace) override;
    536  void GetQuickCheckDetails(QuickCheckDetails* details,
    537                            RegExpCompiler* compiler, int filled_in,
    538                            bool not_at_start) override;
    539  void FillInBMInfo(Isolate* isolate, int offset, int budget,
    540                    BoyerMooreLookahead* bm, bool not_at_start) override;
    541  AssertionType assertion_type() { return assertion_type_; }
    542 
    543 private:
    544  friend Zone;
    545 
    546  void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
    547  enum IfPrevious { kIsNonWord, kIsWord };
    548  void BacktrackIfPrevious(RegExpCompiler* compiler, Trace* trace,
    549                           IfPrevious backtrack_if_previous);
    550  AssertionNode(AssertionType t, RegExpNode* on_success)
    551      : SeqRegExpNode(on_success), assertion_type_(t) {}
    552  AssertionType assertion_type_;
    553 };
    554 
    555 class BackReferenceNode : public SeqRegExpNode {
    556 public:
    557  BackReferenceNode(int start_reg, int end_reg, bool read_backward,
    558                    RegExpNode* on_success)
    559      : SeqRegExpNode(on_success),
    560        start_reg_(start_reg),
    561        end_reg_(end_reg),
    562        read_backward_(read_backward) {}
    563  BackReferenceNode* AsBackReferenceNode() override { return this; }
    564  void Accept(NodeVisitor* visitor) override;
    565  int start_register() { return start_reg_; }
    566  int end_register() { return end_reg_; }
    567  bool read_backward() { return read_backward_; }
    568  void Emit(RegExpCompiler* compiler, Trace* trace) override;
    569  void GetQuickCheckDetails(QuickCheckDetails* details,
    570                            RegExpCompiler* compiler, int characters_filled_in,
    571                            bool not_at_start) override {
    572    return;
    573  }
    574  void FillInBMInfo(Isolate* isolate, int offset, int budget,
    575                    BoyerMooreLookahead* bm, bool not_at_start) override;
    576 
    577 private:
    578  int start_reg_;
    579  int end_reg_;
    580  bool read_backward_;
    581 };
    582 
    583 class EndNode : public RegExpNode {
    584 public:
    585  enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
    586  EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {}
    587  EndNode* AsEndNode() override { return this; }
    588  void Accept(NodeVisitor* visitor) override;
    589  void Emit(RegExpCompiler* compiler, Trace* trace) override;
    590  void GetQuickCheckDetails(QuickCheckDetails* details,
    591                            RegExpCompiler* compiler, int characters_filled_in,
    592                            bool not_at_start) override {
    593    // Returning 0 from EatsAtLeast should ensure we never get here.
    594    UNREACHABLE();
    595  }
    596  void FillInBMInfo(Isolate* isolate, int offset, int budget,
    597                    BoyerMooreLookahead* bm, bool not_at_start) override {
    598    // Returning 0 from EatsAtLeast should ensure we never get here.
    599    UNREACHABLE();
    600  }
    601 
    602 private:
    603  Action action_;
    604 };
    605 
    606 class NegativeSubmatchSuccess : public EndNode {
    607 public:
    608  NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg,
    609                          int clear_capture_count, int clear_capture_start,
    610                          Zone* zone)
    611      : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone),
    612        stack_pointer_register_(stack_pointer_reg),
    613        current_position_register_(position_reg),
    614        clear_capture_count_(clear_capture_count),
    615        clear_capture_start_(clear_capture_start) {}
    616  void Emit(RegExpCompiler* compiler, Trace* trace) override;
    617 
    618 private:
    619  int stack_pointer_register_;
    620  int current_position_register_;
    621  int clear_capture_count_;
    622  int clear_capture_start_;
    623 };
    624 
    625 class Guard : public ZoneObject {
    626 public:
    627  enum Relation { LT, GEQ };
    628  Guard(int reg, Relation op, int value) : reg_(reg), op_(op), value_(value) {}
    629  int reg() { return reg_; }
    630  Relation op() { return op_; }
    631  int value() { return value_; }
    632 
    633 private:
    634  int reg_;
    635  Relation op_;
    636  int value_;
    637 };
    638 
    639 class GuardedAlternative {
    640 public:
    641  explicit GuardedAlternative(RegExpNode* node)
    642      : node_(node), guards_(nullptr) {}
    643  void AddGuard(Guard* guard, Zone* zone);
    644  RegExpNode* node() { return node_; }
    645  void set_node(RegExpNode* node) { node_ = node; }
    646  ZoneList<Guard*>* guards() { return guards_; }
    647 
    648 private:
    649  RegExpNode* node_;
    650  ZoneList<Guard*>* guards_;
    651 };
    652 
    653 class AlternativeGeneration;
    654 
    655 class ChoiceNode : public RegExpNode {
    656 public:
    657  explicit ChoiceNode(int expected_size, Zone* zone)
    658      : RegExpNode(zone),
    659        alternatives_(
    660            zone->New<ZoneList<GuardedAlternative>>(expected_size, zone)),
    661        not_at_start_(false),
    662        being_calculated_(false) {}
    663  ChoiceNode* AsChoiceNode() override { return this; }
    664  void Accept(NodeVisitor* visitor) override;
    665  void AddAlternative(GuardedAlternative node) {
    666    alternatives()->Add(node, zone());
    667  }
    668  ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
    669  void Emit(RegExpCompiler* compiler, Trace* trace) override;
    670  void GetQuickCheckDetails(QuickCheckDetails* details,
    671                            RegExpCompiler* compiler, int characters_filled_in,
    672                            bool not_at_start) override;
    673  void FillInBMInfo(Isolate* isolate, int offset, int budget,
    674                    BoyerMooreLookahead* bm, bool not_at_start) override;
    675 
    676  bool being_calculated() { return being_calculated_; }
    677  bool not_at_start() { return not_at_start_; }
    678  void set_not_at_start() { not_at_start_ = true; }
    679  void set_being_calculated(bool b) { being_calculated_ = b; }
    680  virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
    681    return true;
    682  }
    683  RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) override;
    684  virtual bool read_backward() { return false; }
    685 
    686 protected:
    687  int FixedLengthLoopLengthForAlternative(GuardedAlternative* alternative);
    688  ZoneList<GuardedAlternative>* alternatives_;
    689 
    690 private:
    691  template <typename...>
    692  friend class Analysis;
    693 
    694  void GenerateGuard(RegExpMacroAssembler* macro_assembler, Guard* guard,
    695                     Trace* trace);
    696  int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
    697  void EmitOutOfLineContinuation(RegExpCompiler* compiler, Trace* trace,
    698                                 GuardedAlternative alternative,
    699                                 AlternativeGeneration* alt_gen,
    700                                 int preload_characters,
    701                                 bool next_expects_preload);
    702  void SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
    703                    PreloadState* preloads);
    704  void AssertGuardsMentionRegisters(Trace* trace);
    705  int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace);
    706  Trace* EmitFixedLengthLoop(RegExpCompiler* compiler, Trace* trace,
    707                             AlternativeGenerationList* alt_gens,
    708                             PreloadState* preloads,
    709                             FixedLengthLoopState* fixed_length_loop_state,
    710                             int text_length);
    711  void EmitChoices(RegExpCompiler* compiler,
    712                   AlternativeGenerationList* alt_gens, int first_choice,
    713                   Trace* trace, PreloadState* preloads);
    714 
    715  // If true, this node is never checked at the start of the input.
    716  // Allows a new trace to start with at_start() set to false.
    717  bool not_at_start_;
    718  bool being_calculated_;
    719 };
    720 
    721 class NegativeLookaroundChoiceNode : public ChoiceNode {
    722 public:
    723  explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail,
    724                                        GuardedAlternative then_do_this,
    725                                        Zone* zone)
    726      : ChoiceNode(2, zone) {
    727    AddAlternative(this_must_fail);
    728    AddAlternative(then_do_this);
    729  }
    730  void GetQuickCheckDetails(QuickCheckDetails* details,
    731                            RegExpCompiler* compiler, int characters_filled_in,
    732                            bool not_at_start) override;
    733  void FillInBMInfo(Isolate* isolate, int offset, int budget,
    734                    BoyerMooreLookahead* bm, bool not_at_start) override {
    735    continue_node()->FillInBMInfo(isolate, offset, budget - 1, bm,
    736                                  not_at_start);
    737    if (offset == 0) set_bm_info(not_at_start, bm);
    738  }
    739  static constexpr int kLookaroundIndex = 0;
    740  static constexpr int kContinueIndex = 1;
    741  RegExpNode* lookaround_node() {
    742    return alternatives()->at(kLookaroundIndex).node();
    743  }
    744  RegExpNode* continue_node() {
    745    return alternatives()->at(kContinueIndex).node();
    746  }
    747  // For a negative lookahead we don't emit the quick check for the
    748  // alternative that is expected to fail.  This is because quick check code
    749  // starts by loading enough characters for the alternative that takes fewest
    750  // characters, but on a negative lookahead the negative branch did not take
    751  // part in that calculation (EatsAtLeast) so the assumptions don't hold.
    752  bool try_to_emit_quick_check_for_alternative(bool is_first) override {
    753    return !is_first;
    754  }
    755  NegativeLookaroundChoiceNode* AsNegativeLookaroundChoiceNode() override {
    756    return this;
    757  }
    758  void Accept(NodeVisitor* visitor) override;
    759  RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) override;
    760 };
    761 
    762 class LoopChoiceNode : public ChoiceNode {
    763 public:
    764  LoopChoiceNode(bool body_can_be_zero_length, bool read_backward,
    765                 int min_loop_iterations, Zone* zone)
    766      : ChoiceNode(2, zone),
    767        loop_node_(nullptr),
    768        continue_node_(nullptr),
    769        body_can_be_zero_length_(body_can_be_zero_length),
    770        read_backward_(read_backward),
    771        traversed_loop_initialization_node_(false),
    772        min_loop_iterations_(min_loop_iterations) {}
    773  void AddLoopAlternative(GuardedAlternative alt);
    774  void AddContinueAlternative(GuardedAlternative alt);
    775  void Emit(RegExpCompiler* compiler, Trace* trace) override;
    776  void GetQuickCheckDetails(QuickCheckDetails* details,
    777                            RegExpCompiler* compiler, int characters_filled_in,
    778                            bool not_at_start) override;
    779  void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
    780                                         RegExpCompiler* compiler,
    781                                         int characters_filled_in,
    782                                         bool not_at_start) override;
    783  void FillInBMInfo(Isolate* isolate, int offset, int budget,
    784                    BoyerMooreLookahead* bm, bool not_at_start) override;
    785  EatsAtLeastInfo EatsAtLeastFromLoopEntry() override;
    786  RegExpNode* loop_node() { return loop_node_; }
    787  RegExpNode* continue_node() { return continue_node_; }
    788  bool body_can_be_zero_length() { return body_can_be_zero_length_; }
    789  int min_loop_iterations() const { return min_loop_iterations_; }
    790  bool read_backward() override { return read_backward_; }
    791  LoopChoiceNode* AsLoopChoiceNode() override { return this; }
    792  void Accept(NodeVisitor* visitor) override;
    793  RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) override;
    794 
    795 private:
    796  // AddAlternative is made private for loop nodes because alternatives
    797  // should not be added freely, we need to keep track of which node
    798  // goes back to the node itself.
    799  void AddAlternative(GuardedAlternative node) {
    800    ChoiceNode::AddAlternative(node);
    801  }
    802 
    803  RegExpNode* loop_node_;
    804  RegExpNode* continue_node_;
    805  bool body_can_be_zero_length_;
    806  bool read_backward_;
    807 
    808  // Temporary marker set only while generating quick check details. Represents
    809  // whether GetQuickCheckDetails traversed the initialization node for this
    810  // loop's counter. If so, we may be able to generate stricter quick checks
    811  // because we know the loop node must match at least min_loop_iterations_
    812  // times before the continuation node can match.
    813  bool traversed_loop_initialization_node_;
    814 
    815  // The minimum number of times the loop_node_ must match before the
    816  // continue_node_ might be considered. This value can be temporarily decreased
    817  // while generating quick check details, to represent the remaining iterations
    818  // after the completed portion of the quick check details.
    819  int min_loop_iterations_;
    820 
    821  friend class IterationDecrementer;
    822  friend class LoopInitializationMarker;
    823 };
    824 
    825 class NodeVisitor {
    826 public:
    827  virtual ~NodeVisitor() = default;
    828 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0;
    829  FOR_EACH_NODE_TYPE(DECLARE_VISIT)
    830 #undef DECLARE_VISIT
    831 };
    832 
    833 }  // namespace internal
    834 }  // namespace v8
    835 
    836 #endif  // V8_REGEXP_REGEXP_NODES_H_