tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

regexp-ast.h (25810B)


      1 // Copyright 2016 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_REGEXP_REGEXP_AST_H_
      6 #define V8_REGEXP_REGEXP_AST_H_
      7 
      8 #include <optional>
      9 
     10 #include "irregexp/RegExpShim.h"
     11 
     12 #ifdef V8_INTL_SUPPORT
     13 #include "unicode/uniset.h"
     14 #endif  // V8_INTL_SUPPORT
     15 
     16 namespace v8::internal {
     17 
     18 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
     19  VISIT(Disjunction)                      \
     20  VISIT(Alternative)                      \
     21  VISIT(Assertion)                        \
     22  VISIT(ClassRanges)                      \
     23  VISIT(ClassSetOperand)                  \
     24  VISIT(ClassSetExpression)               \
     25  VISIT(Atom)                             \
     26  VISIT(Quantifier)                       \
     27  VISIT(Capture)                          \
     28  VISIT(Group)                            \
     29  VISIT(Lookaround)                       \
     30  VISIT(BackReference)                    \
     31  VISIT(Empty)                            \
     32  VISIT(Text)
     33 
     34 #define FORWARD_DECLARE(Name) class RegExp##Name;
     35 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
     36 #undef FORWARD_DECLARE
     37 
     38 class RegExpCompiler;
     39 class RegExpNode;
     40 class RegExpTree;
     41 
     42 class RegExpVisitor {
     43 public:
     44  virtual ~RegExpVisitor() = default;
     45 #define MAKE_CASE(Name) \
     46  virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
     47  FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
     48 #undef MAKE_CASE
     49 };
     50 
     51 // A simple closed interval.
     52 class Interval {
     53 public:
     54  Interval() : from_(kNone), to_(kNone - 1) {}  // '- 1' for branchless size().
     55  Interval(int from, int to) : from_(from), to_(to) {}
     56  Interval Union(Interval that) {
     57    if (that.from_ == kNone) return *this;
     58    if (from_ == kNone) return that;
     59    return Interval(std::min(from_, that.from_), std::max(to_, that.to_));
     60  }
     61 
     62  static Interval Empty() { return Interval(); }
     63 
     64  bool Contains(int value) const { return (from_ <= value) && (value <= to_); }
     65  bool is_empty() const { return from_ == kNone; }
     66  int from() const { return from_; }
     67  int to() const { return to_; }
     68  int size() const { return to_ - from_ + 1; }
     69 
     70  static constexpr int kNone = -1;
     71 
     72 private:
     73  int from_;
     74  int to_;
     75 };
     76 
     77 // Named standard character sets.
     78 enum class StandardCharacterSet : char {
     79  kWhitespace = 's',         // Like /\s/.
     80  kNotWhitespace = 'S',      // Like /\S/.
     81  kWord = 'w',               // Like /\w/.
     82  kNotWord = 'W',            // Like /\W/.
     83  kDigit = 'd',              // Like /\d/.
     84  kNotDigit = 'D',           // Like /\D/.
     85  kLineTerminator = 'n',     // The inverse of /./.
     86  kNotLineTerminator = '.',  // Like /./.
     87  kEverything = '*',         // Matches every character, like /./s.
     88 };
     89 
     90 // Represents code points (with values up to 0x10FFFF) in the range from from_
     91 // to to_, both ends are inclusive.
     92 class CharacterRange {
     93 public:
     94  CharacterRange() = default;
     95  // For compatibility with the CHECK_OK macro.
     96  CharacterRange(void* null) { DCHECK_NULL(null); }  // NOLINT
     97 
     98  static inline CharacterRange Singleton(base::uc32 value) {
     99    return CharacterRange(value, value);
    100  }
    101  static inline CharacterRange Range(base::uc32 from, base::uc32 to) {
    102    DCHECK(0 <= from && to <= kMaxCodePoint);
    103    DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to));
    104    return CharacterRange(from, to);
    105  }
    106  static inline CharacterRange Everything() {
    107    return CharacterRange(0, kMaxCodePoint);
    108  }
    109 
    110  static inline ZoneList<CharacterRange>* List(Zone* zone,
    111                                               CharacterRange range) {
    112    ZoneList<CharacterRange>* list =
    113        zone->New<ZoneList<CharacterRange>>(1, zone);
    114    list->Add(range, zone);
    115    return list;
    116  }
    117 
    118  // Add class escapes. Add case equivalent closure for \w and \W if necessary.
    119  V8_EXPORT_PRIVATE static void AddClassEscape(
    120      StandardCharacterSet standard_character_set,
    121      ZoneList<CharacterRange>* ranges, bool add_unicode_case_equivalents,
    122      Zone* zone);
    123  // Add case equivalents to ranges. Only used for /i, not for /ui or /vi, as
    124  // the semantics for unicode mode are slightly different.
    125  // See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch Note 4.
    126  V8_EXPORT_PRIVATE static void AddCaseEquivalents(
    127      Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges,
    128      bool is_one_byte);
    129  // Add case equivalent code points to ranges. Only used for /ui and /vi, not
    130  // for /i, as the semantics for non-unicode mode are slightly different.
    131  // See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch Note 4.
    132  static void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
    133                                        Zone* zone);
    134 
    135  bool Contains(base::uc32 i) const { return from_ <= i && i <= to_; }
    136  base::uc32 from() const { return from_; }
    137  base::uc32 to() const { return to_; }
    138  bool IsEverything(base::uc32 max) const { return from_ == 0 && to_ >= max; }
    139  bool IsSingleton() const { return from_ == to_; }
    140 
    141  // Whether a range list is in canonical form: Ranges ordered by from value,
    142  // and ranges non-overlapping and non-adjacent.
    143  V8_EXPORT_PRIVATE static bool IsCanonical(
    144      const ZoneList<CharacterRange>* ranges);
    145  // Convert range list to canonical form. The characters covered by the ranges
    146  // will still be the same, but no character is in more than one range, and
    147  // adjacent ranges are merged. The resulting list may be shorter than the
    148  // original, but cannot be longer.
    149  static void Canonicalize(ZoneList<CharacterRange>* ranges);
    150  // Negate the contents of a character range in canonical form.
    151  static void Negate(const ZoneList<CharacterRange>* src,
    152                     ZoneList<CharacterRange>* dst, Zone* zone);
    153  // Intersect the contents of two character ranges in canonical form.
    154  static void Intersect(const ZoneList<CharacterRange>* lhs,
    155                        const ZoneList<CharacterRange>* rhs,
    156                        ZoneList<CharacterRange>* dst, Zone* zone);
    157  // Subtract the contents of |to_remove| from the contents of |src|.
    158  static void Subtract(const ZoneList<CharacterRange>* src,
    159                       const ZoneList<CharacterRange>* to_remove,
    160                       ZoneList<CharacterRange>* dst, Zone* zone);
    161  // Remove all ranges outside the one-byte range.
    162  static void ClampToOneByte(ZoneList<CharacterRange>* ranges);
    163  // Checks if two ranges (both need to be canonical) are equal.
    164  static bool Equals(const ZoneList<CharacterRange>* lhs,
    165                     const ZoneList<CharacterRange>* rhs);
    166 
    167 private:
    168  CharacterRange(base::uc32 from, base::uc32 to) : from_(from), to_(to) {}
    169 
    170  static constexpr int kMaxCodePoint = 0x10ffff;
    171 
    172  base::uc32 from_ = 0;
    173  base::uc32 to_ = 0;
    174 };
    175 
    176 inline bool operator==(const CharacterRange& lhs, const CharacterRange& rhs) {
    177  return lhs.from() == rhs.from() && lhs.to() == rhs.to();
    178 }
    179 inline bool operator!=(const CharacterRange& lhs, const CharacterRange& rhs) {
    180  return !operator==(lhs, rhs);
    181 }
    182 
    183 #define DECL_BOILERPLATE(Name)                                             \
    184  void* Accept(RegExpVisitor* visitor, void* data) override;               \
    185  RegExpNode* ToNodeImpl(RegExpCompiler* compiler, RegExpNode* on_success) \
    186      override;                                                            \
    187  RegExp##Name* As##Name() override;                                       \
    188  bool Is##Name() override
    189 
    190 class RegExpTree : public ZoneObject {
    191 public:
    192  static const int kInfinity = kMaxInt;
    193  virtual ~RegExpTree() = default;
    194  virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
    195  RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success);
    196  virtual RegExpNode* ToNodeImpl(RegExpCompiler* compiler,
    197                                 RegExpNode* on_success) = 0;
    198  virtual bool IsTextElement() { return false; }
    199  virtual bool IsAnchoredAtStart() { return false; }
    200  virtual bool IsAnchoredAtEnd() { return false; }
    201  virtual int min_match() = 0;
    202  virtual int max_match() = 0;
    203  // Returns the interval of registers used for captures within this
    204  // expression.
    205  virtual Interval CaptureRegisters() { return Interval::Empty(); }
    206  virtual void AppendToText(RegExpText* text, Zone* zone);
    207  V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os, Zone* zone);
    208 #define MAKE_ASTYPE(Name)           \
    209  virtual RegExp##Name* As##Name(); \
    210  virtual bool Is##Name();
    211  FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
    212 #undef MAKE_ASTYPE
    213 };
    214 
    215 class RegExpDisjunction final : public RegExpTree {
    216 public:
    217  explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives);
    218 
    219  DECL_BOILERPLATE(Disjunction);
    220 
    221  Interval CaptureRegisters() override;
    222  bool IsAnchoredAtStart() override;
    223  bool IsAnchoredAtEnd() override;
    224  int min_match() override { return min_match_; }
    225  int max_match() override { return max_match_; }
    226  ZoneList<RegExpTree*>* alternatives() const { return alternatives_; }
    227 
    228 private:
    229  bool SortConsecutiveAtoms(RegExpCompiler* compiler);
    230  void RationalizeConsecutiveAtoms(RegExpCompiler* compiler);
    231  void FixSingleCharacterDisjunctions(RegExpCompiler* compiler);
    232  ZoneList<RegExpTree*>* alternatives_;
    233  int min_match_;
    234  int max_match_;
    235 };
    236 
    237 class RegExpAlternative final : public RegExpTree {
    238 public:
    239  explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes);
    240 
    241  DECL_BOILERPLATE(Alternative);
    242 
    243  Interval CaptureRegisters() override;
    244  bool IsAnchoredAtStart() override;
    245  bool IsAnchoredAtEnd() override;
    246  int min_match() override { return min_match_; }
    247  int max_match() override { return max_match_; }
    248  ZoneList<RegExpTree*>* nodes() const { return nodes_; }
    249 
    250 private:
    251  ZoneList<RegExpTree*>* nodes_;
    252  int min_match_;
    253  int max_match_;
    254 };
    255 
    256 class RegExpAssertion final : public RegExpTree {
    257 public:
    258  enum class Type {
    259    START_OF_LINE = 0,
    260    START_OF_INPUT = 1,
    261    END_OF_LINE = 2,
    262    END_OF_INPUT = 3,
    263    BOUNDARY = 4,
    264    NON_BOUNDARY = 5,
    265    LAST_ASSERTION_TYPE = NON_BOUNDARY,
    266  };
    267  explicit RegExpAssertion(Type type) : assertion_type_(type) {}
    268 
    269  DECL_BOILERPLATE(Assertion);
    270 
    271  bool IsAnchoredAtStart() override;
    272  bool IsAnchoredAtEnd() override;
    273  int min_match() override { return 0; }
    274  int max_match() override { return 0; }
    275  Type assertion_type() const { return assertion_type_; }
    276 
    277 private:
    278  const Type assertion_type_;
    279 };
    280 
    281 class CharacterSet final {
    282 public:
    283  explicit CharacterSet(StandardCharacterSet standard_set_type)
    284      : standard_set_type_(standard_set_type) {}
    285  explicit CharacterSet(ZoneList<CharacterRange>* ranges) : ranges_(ranges) {}
    286 
    287  ZoneList<CharacterRange>* ranges(Zone* zone);
    288  StandardCharacterSet standard_set_type() const {
    289    return standard_set_type_.value();
    290  }
    291  void set_standard_set_type(StandardCharacterSet standard_set_type) {
    292    standard_set_type_ = standard_set_type;
    293  }
    294  bool is_standard() const { return standard_set_type_.has_value(); }
    295  V8_EXPORT_PRIVATE void Canonicalize();
    296 
    297 private:
    298  ZoneList<CharacterRange>* ranges_ = nullptr;
    299  std::optional<StandardCharacterSet> standard_set_type_;
    300 };
    301 
    302 class RegExpClassRanges final : public RegExpTree {
    303 public:
    304  // NEGATED: The character class is negated and should match everything but
    305  //     the specified ranges.
    306  // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split
    307  //     surrogate and should not be unicode-desugared (crbug.com/641091).
    308  // IS_CASE_FOLDED: If case folding is required (/i), it was already
    309  //     performed on individual ranges and should not be applied again.
    310  enum Flag {
    311    NEGATED = 1 << 0,
    312    CONTAINS_SPLIT_SURROGATE = 1 << 1,
    313    IS_CASE_FOLDED = 1 << 2,
    314  };
    315  using ClassRangesFlags = base::Flags<Flag>;
    316 
    317  RegExpClassRanges(Zone* zone, ZoneList<CharacterRange>* ranges,
    318                    ClassRangesFlags class_ranges_flags = ClassRangesFlags())
    319      : set_(ranges), class_ranges_flags_(class_ranges_flags) {
    320    // Convert the empty set of ranges to the negated Everything() range.
    321    if (ranges->is_empty()) {
    322      ranges->Add(CharacterRange::Everything(), zone);
    323      class_ranges_flags_ ^= NEGATED;
    324    }
    325  }
    326  explicit RegExpClassRanges(StandardCharacterSet standard_set_type)
    327      : set_(standard_set_type), class_ranges_flags_() {}
    328 
    329  DECL_BOILERPLATE(ClassRanges);
    330 
    331  bool IsTextElement() override { return true; }
    332  int min_match() override { return 1; }
    333  // The character class may match two code units for unicode regexps.
    334  // TODO(yangguo): we should split this class for usage in TextElement, and
    335  //                make max_match() dependent on the character class content.
    336  int max_match() override { return 2; }
    337 
    338  void AppendToText(RegExpText* text, Zone* zone) override;
    339 
    340  // TODO(lrn): Remove need for complex version if is_standard that
    341  // recognizes a mangled standard set and just do { return set_.is_special(); }
    342  bool is_standard(Zone* zone);
    343  // Returns a value representing the standard character set if is_standard()
    344  // returns true.
    345  StandardCharacterSet standard_type() const {
    346    return set_.standard_set_type();
    347  }
    348 
    349  CharacterSet character_set() const { return set_; }
    350  ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
    351 
    352  bool is_negated() const { return (class_ranges_flags_ & NEGATED) != 0; }
    353  bool contains_split_surrogate() const {
    354    return (class_ranges_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
    355  }
    356  bool is_case_folded() const {
    357    return (class_ranges_flags_ & IS_CASE_FOLDED) != 0;
    358  }
    359 
    360 private:
    361  CharacterSet set_;
    362  ClassRangesFlags class_ranges_flags_;
    363 };
    364 
    365 struct CharacterClassStringLess {
    366  bool operator()(base::Vector<const base::uc32> lhs,
    367                  base::Vector<const base::uc32> rhs) const {
    368    // Longer strings first so we generate matches for the largest string
    369    // possible.
    370    if (lhs.length() != rhs.length()) {
    371      return lhs.length() > rhs.length();
    372    }
    373    for (int i = 0; i < lhs.length(); i++) {
    374      if (lhs[i] != rhs[i]) {
    375        return lhs[i] < rhs[i];
    376      }
    377    }
    378    return false;
    379  }
    380 };
    381 
    382 // A type used for strings as part of character classes (only possible in
    383 // unicode sets mode).
    384 // We use a ZoneMap instead of an UnorderedZoneMap because we need to match
    385 // the longest alternatives first. By using a ZoneMap with the custom comparator
    386 // we can avoid sorting before assembling the code.
    387 // Strings are likely short (the largest string in current unicode properties
    388 // consists of 10 code points).
    389 using CharacterClassStrings = ZoneMap<base::Vector<const base::uc32>,
    390                                      RegExpTree*, CharacterClassStringLess>;
    391 
    392 // TODO(pthier): If we are sure we don't want to use icu::UnicodeSets
    393 // (performance evaluation pending), this class can be merged with
    394 // RegExpClassRanges.
    395 class RegExpClassSetOperand final : public RegExpTree {
    396 public:
    397  RegExpClassSetOperand(ZoneList<CharacterRange>* ranges,
    398                        CharacterClassStrings* strings);
    399 
    400  DECL_BOILERPLATE(ClassSetOperand);
    401 
    402  bool IsTextElement() override { return true; }
    403  int min_match() override { return min_match_; }
    404  int max_match() override { return max_match_; }
    405 
    406  void Union(RegExpClassSetOperand* other, Zone* zone);
    407  void Intersect(RegExpClassSetOperand* other,
    408                 ZoneList<CharacterRange>* temp_ranges, Zone* zone);
    409  void Subtract(RegExpClassSetOperand* other,
    410                ZoneList<CharacterRange>* temp_ranges, Zone* zone);
    411 
    412  bool has_strings() const { return strings_ != nullptr && !strings_->empty(); }
    413  ZoneList<CharacterRange>* ranges() { return ranges_; }
    414  CharacterClassStrings* strings() {
    415    DCHECK_NOT_NULL(strings_);
    416    return strings_;
    417  }
    418 
    419 private:
    420  ZoneList<CharacterRange>* ranges_;
    421  CharacterClassStrings* strings_;
    422  int min_match_;
    423  int max_match_;
    424 };
    425 
    426 class RegExpClassSetExpression final : public RegExpTree {
    427 public:
    428  enum class OperationType { kUnion, kIntersection, kSubtraction };
    429 
    430  RegExpClassSetExpression(OperationType op, bool is_negated,
    431                           bool may_contain_strings,
    432                           ZoneList<RegExpTree*>* operands);
    433 
    434  DECL_BOILERPLATE(ClassSetExpression);
    435 
    436  // Create an empty class set expression (matches everything if |is_negated|,
    437  // nothing otherwise).
    438  static RegExpClassSetExpression* Empty(Zone* zone, bool is_negated);
    439 
    440  bool IsTextElement() override { return true; }
    441  int min_match() override { return 0; }
    442  int max_match() override { return max_match_; }
    443 
    444  OperationType operation() const { return operation_; }
    445  bool is_negated() const { return is_negated_; }
    446  bool may_contain_strings() const { return may_contain_strings_; }
    447  const ZoneList<RegExpTree*>* operands() const { return operands_; }
    448  ZoneList<RegExpTree*>* operands() { return operands_; }
    449 
    450 private:
    451  // Recursively evaluates the tree rooted at |root|, computing the valid
    452  // CharacterRanges and strings after applying all set operations.
    453  // The original tree will be modified by this method, so don't store pointers
    454  // to inner nodes of the tree somewhere else!
    455  // Modifying the tree in-place saves memory and speeds up multiple calls of
    456  // the method (e.g. when unrolling quantifiers).
    457  // |temp_ranges| is used for intermediate results, passed as parameter to
    458  // avoid allocating new lists all the time.
    459  static RegExpClassSetOperand* ComputeExpression(
    460      RegExpTree* root, ZoneList<CharacterRange>* temp_ranges, Zone* zone);
    461 
    462  const OperationType operation_;
    463  bool is_negated_;
    464  const bool may_contain_strings_;
    465  ZoneList<RegExpTree*>* operands_ = nullptr;
    466  int max_match_;
    467 };
    468 
    469 class RegExpAtom final : public RegExpTree {
    470 public:
    471  explicit RegExpAtom(base::Vector<const base::uc16> data) : data_(data) {}
    472 
    473  DECL_BOILERPLATE(Atom);
    474 
    475  bool IsTextElement() override { return true; }
    476  int min_match() override { return data_.length(); }
    477  int max_match() override { return data_.length(); }
    478  void AppendToText(RegExpText* text, Zone* zone) override;
    479 
    480  base::Vector<const base::uc16> data() const { return data_; }
    481  int length() const { return data_.length(); }
    482 
    483 private:
    484  base::Vector<const base::uc16> data_;
    485 };
    486 
    487 class TextElement final {
    488 public:
    489  enum TextType { ATOM, CLASS_RANGES };
    490 
    491  static TextElement Atom(RegExpAtom* atom);
    492  static TextElement ClassRanges(RegExpClassRanges* class_ranges);
    493 
    494  int cp_offset() const { return cp_offset_; }
    495  void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
    496  int length() const;
    497 
    498  TextType text_type() const { return text_type_; }
    499 
    500  RegExpTree* tree() const { return tree_; }
    501 
    502  RegExpAtom* atom() const {
    503    DCHECK(text_type() == ATOM);
    504    return reinterpret_cast<RegExpAtom*>(tree());
    505  }
    506 
    507  RegExpClassRanges* class_ranges() const {
    508    DCHECK(text_type() == CLASS_RANGES);
    509    return reinterpret_cast<RegExpClassRanges*>(tree());
    510  }
    511 
    512 private:
    513  TextElement(TextType text_type, RegExpTree* tree)
    514      : cp_offset_(-1), text_type_(text_type), tree_(tree) {}
    515 
    516  int cp_offset_;
    517  TextType text_type_;
    518  RegExpTree* tree_;
    519 };
    520 
    521 class RegExpText final : public RegExpTree {
    522 public:
    523  explicit RegExpText(Zone* zone) : elements_(2, zone) {}
    524 
    525  DECL_BOILERPLATE(Text);
    526 
    527  bool IsTextElement() override { return true; }
    528  int min_match() override { return length_; }
    529  int max_match() override { return length_; }
    530  void AppendToText(RegExpText* text, Zone* zone) override;
    531  void AddElement(TextElement elm, Zone* zone) {
    532    elements_.Add(elm, zone);
    533    length_ += elm.length();
    534  }
    535  ZoneList<TextElement>* elements() { return &elements_; }
    536 
    537  bool StartsWithAtom() const;
    538  RegExpAtom* FirstAtom() const;
    539 
    540 private:
    541  ZoneList<TextElement> elements_;
    542  int length_ = 0;
    543 };
    544 
    545 class RegExpQuantifier final : public RegExpTree {
    546 public:
    547  enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
    548  RegExpQuantifier(int min, int max, QuantifierType type, int index,
    549                   RegExpTree* body)
    550      : body_(body),
    551        min_(min),
    552        max_(max),
    553        quantifier_type_(type),
    554        index_(index) {
    555    if (min > 0 && body->min_match() > kInfinity / min) {
    556      min_match_ = kInfinity;
    557    } else {
    558      min_match_ = min * body->min_match();
    559    }
    560    if (max > 0 && body->max_match() > kInfinity / max) {
    561      max_match_ = kInfinity;
    562    } else {
    563      max_match_ = max * body->max_match();
    564    }
    565  }
    566 
    567  DECL_BOILERPLATE(Quantifier);
    568 
    569  static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body,
    570                            RegExpCompiler* compiler, RegExpNode* on_success,
    571                            bool not_at_start = false);
    572  Interval CaptureRegisters() override;
    573  int min_match() override { return min_match_; }
    574  int max_match() override { return max_match_; }
    575  int min() const { return min_; }
    576  int max() const { return max_; }
    577  QuantifierType quantifier_type() const { return quantifier_type_; }
    578  int index() const { return index_; }
    579  bool is_possessive() const { return quantifier_type_ == POSSESSIVE; }
    580  bool is_non_greedy() const { return quantifier_type_ == NON_GREEDY; }
    581  bool is_greedy() const { return quantifier_type_ == GREEDY; }
    582  RegExpTree* body() const { return body_; }
    583 
    584 private:
    585  RegExpTree* body_;
    586  int min_;
    587  int max_;
    588  int min_match_;
    589  int max_match_;
    590  QuantifierType quantifier_type_;
    591  int index_;
    592 };
    593 
    594 class RegExpCapture final : public RegExpTree {
    595 public:
    596  explicit RegExpCapture(int index)
    597      : body_(nullptr),
    598        index_(index),
    599        min_match_(0),
    600        max_match_(0),
    601        name_(nullptr) {}
    602 
    603  DECL_BOILERPLATE(Capture);
    604 
    605  static RegExpNode* ToNode(RegExpTree* body, int index,
    606                            RegExpCompiler* compiler, RegExpNode* on_success);
    607  bool IsAnchoredAtStart() override;
    608  bool IsAnchoredAtEnd() override;
    609  Interval CaptureRegisters() override;
    610  int min_match() override { return min_match_; }
    611  int max_match() override { return max_match_; }
    612  RegExpTree* body() { return body_; }
    613  void set_body(RegExpTree* body) {
    614    body_ = body;
    615    min_match_ = body->min_match();
    616    max_match_ = body->max_match();
    617  }
    618  int index() const { return index_; }
    619  const ZoneVector<base::uc16>* name() const { return name_; }
    620  void set_name(const ZoneVector<base::uc16>* name) { name_ = name; }
    621  static int StartRegister(int index) { return index * 2; }
    622  static int EndRegister(int index) { return index * 2 + 1; }
    623 
    624 private:
    625  RegExpTree* body_ = nullptr;
    626  int index_;
    627  int min_match_ = 0;
    628  int max_match_ = 0;
    629  const ZoneVector<base::uc16>* name_ = nullptr;
    630 };
    631 
    632 class RegExpGroup final : public RegExpTree {
    633 public:
    634  explicit RegExpGroup(RegExpTree* body, RegExpFlags flags)
    635      : body_(body),
    636        flags_(flags),
    637        min_match_(body->min_match()),
    638        max_match_(body->max_match()) {}
    639 
    640  DECL_BOILERPLATE(Group);
    641 
    642  bool IsAnchoredAtStart() override { return body_->IsAnchoredAtStart(); }
    643  bool IsAnchoredAtEnd() override { return body_->IsAnchoredAtEnd(); }
    644  int min_match() override { return min_match_; }
    645  int max_match() override { return max_match_; }
    646  Interval CaptureRegisters() override { return body_->CaptureRegisters(); }
    647  RegExpTree* body() const { return body_; }
    648  RegExpFlags flags() const { return flags_; }
    649 
    650 private:
    651  RegExpTree* body_;
    652  const RegExpFlags flags_;
    653  int min_match_;
    654  int max_match_;
    655 };
    656 
    657 class RegExpLookaround final : public RegExpTree {
    658 public:
    659  enum Type { LOOKAHEAD, LOOKBEHIND };
    660 
    661  RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count,
    662                   int capture_from, Type type, int index)
    663      : body_(body),
    664        is_positive_(is_positive),
    665        capture_count_(capture_count),
    666        capture_from_(capture_from),
    667        type_(type),
    668        index_(index) {}
    669 
    670  DECL_BOILERPLATE(Lookaround);
    671 
    672  Interval CaptureRegisters() override;
    673  bool IsAnchoredAtStart() override;
    674  int min_match() override { return 0; }
    675  int max_match() override { return 0; }
    676  RegExpTree* body() const { return body_; }
    677  bool is_positive() const { return is_positive_; }
    678  int capture_count() const { return capture_count_; }
    679  int capture_from() const { return capture_from_; }
    680  Type type() const { return type_; }
    681  int index() const { return index_; }
    682 
    683  class Builder {
    684   public:
    685    Builder(bool is_positive, RegExpNode* on_success,
    686            int stack_pointer_register, int position_register,
    687            int capture_register_count = 0, int capture_register_start = 0);
    688    RegExpNode* on_match_success() const { return on_match_success_; }
    689    RegExpNode* ForMatch(RegExpNode* match);
    690 
    691   private:
    692    bool is_positive_;
    693    RegExpNode* on_match_success_;
    694    RegExpNode* on_success_;
    695    int stack_pointer_register_;
    696    int position_register_;
    697  };
    698 
    699 private:
    700  RegExpTree* body_;
    701  bool is_positive_;
    702  int capture_count_;
    703  int capture_from_;
    704  Type type_;
    705  int index_;
    706 };
    707 
    708 class RegExpBackReference final : public RegExpTree {
    709 public:
    710  explicit RegExpBackReference(Zone* zone) : captures_(1, zone) {}
    711  explicit RegExpBackReference(RegExpCapture* capture, Zone* zone)
    712      : captures_(1, zone) {
    713    captures_.Add(capture, zone);
    714  }
    715 
    716  DECL_BOILERPLATE(BackReference);
    717 
    718  int min_match() override { return 0; }
    719  // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
    720  // recursion, we give up. Ignorance is bliss.
    721  int max_match() override { return kInfinity; }
    722  const ZoneList<RegExpCapture*>* captures() const { return &captures_; }
    723  void add_capture(RegExpCapture* capture, Zone* zone) {
    724    captures_.Add(capture, zone);
    725  }
    726  const ZoneVector<base::uc16>* name() const { return name_; }
    727  void set_name(const ZoneVector<base::uc16>* name) { name_ = name; }
    728 
    729 private:
    730  ZoneList<RegExpCapture*> captures_;
    731  const ZoneVector<base::uc16>* name_ = nullptr;
    732 };
    733 
    734 class RegExpEmpty final : public RegExpTree {
    735 public:
    736  DECL_BOILERPLATE(Empty);
    737  int min_match() override { return 0; }
    738  int max_match() override { return 0; }
    739 };
    740 
    741 }  // namespace v8::internal
    742 
    743 #undef DECL_BOILERPLATE
    744 
    745 #endif  // V8_REGEXP_REGEXP_AST_H_