tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

regexp-bytecodes.h (35213B)


      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_REGEXP_REGEXP_BYTECODES_H_
      6 #define V8_REGEXP_REGEXP_BYTECODES_H_
      7 
      8 #include "irregexp/RegExpShim.h"
      9 
     10 namespace v8 {
     11 namespace internal {
     12 
     13 // Maximum number of bytecodes that will be used (next power of 2 of actually
     14 // defined bytecodes).
     15 // All slots between the last actually defined bytecode and maximum id will be
     16 // filled with BREAKs, indicating an invalid operation. This way using
     17 // BYTECODE_MASK guarantees no OOB access to the dispatch table.
     18 constexpr int kRegExpPaddedBytecodeCount = 1 << 6;
     19 constexpr int BYTECODE_MASK = kRegExpPaddedBytecodeCount - 1;
     20 // The first argument is packed in with the byte code in one word, but so it
     21 // has 24 bits, but it can be positive and negative so only use 23 bits for
     22 // positive values.
     23 const unsigned int MAX_FIRST_ARG = 0x7fffffu;
     24 const int BYTECODE_SHIFT = 8;
     25 static_assert(1 << BYTECODE_SHIFT > BYTECODE_MASK);
     26 
     27 // Basic operand types that have a direct mapping to a C-type.
     28 // Getters/Setters for these are fully auto-generated.
     29 // Format: V(Name, C type)
     30 #define BASIC_BYTECODE_OPERAND_TYPE_LIST(V) \
     31  V(Int16, int16_t)                         \
     32  V(Int32, int32_t)                         \
     33  V(Uint32, uint32_t)                       \
     34  V(Char, base::uc16)                       \
     35  V(Label, uint32_t)                        \
     36  V(Offset, int16_t)                        \
     37  V(Register, uint16_t)
     38 
     39 // Special operand types that don't have a direct mapping to a C-type.
     40 // Getters/Setters for these types need to be specialized manually.
     41 #define SPECIAL_BYTECODE_OPERAND_TYPE_LIST(V)                              \
     42  V(BitTable, 16)                                                          \
     43  V(Padding, 2) /* TODO(pthier): padding is only required for backwards    \
     44  compatibility with the old layout. It can be removed after everything is \
     45  using the new layout. */
     46 
     47 #define BYTECODE_OPERAND_TYPE_LIST(V) \
     48  BASIC_BYTECODE_OPERAND_TYPE_LIST(V) \
     49  SPECIAL_BYTECODE_OPERAND_TYPE_LIST(V)
     50 
     51 enum class RegExpBytecodeOperandType : uint8_t {
     52 #define DECLARE_OPERAND(Name, ...) k##Name,
     53  BYTECODE_OPERAND_TYPE_LIST(DECLARE_OPERAND)
     54 #undef DECLARE_OPERAND
     55 };
     56 
     57 using ReBcOpType = RegExpBytecodeOperandType;
     58 
     59 // Bytecodes that indicate something is invalid. These don't have a direct
     60 // equivalent in RegExpMacroAssembler.
     61 // It's a requirement that BREAK has an enum value of 0 (as e.g. jumps to offset
     62 // 0 are considered invalid).
     63 // Format: V(CamelName, SNAKE_NAME, (OperandNames...), // (OperandTypes...))
     64 // TODO(pthier): SNAKE_NAME is temporary to static_assert that the new bytecode
     65 // enum and bytecode layouts are compatible with the old one. Remove once all
     66 // uses have been migrated.
     67 #define INVALID_BYTECODE_LIST(V) V(Break, BREAK, (), ())
     68 
     69 // Basic Bytecodes. These have a direct equivalent in the RegExpMacroAssembler.
     70 // Format: V(CamelName, SNAKE_NAME, (OperandNames...), (OperandTypes...))
     71 // TODO(pthier): SNAKE_NAME is temporary to static_assert that the new bytecode
     72 // enum and bytecode layouts are compatible with the old one. Remove once all
     73 // uses have been migrated.
     74 #define BASIC_BYTECODE_LIST(V)                                                 \
     75  V(PushCurrentPosition, PUSH_CP, (), ())                                      \
     76  V(PushBacktrack, PUSH_BT, (on_bt_pushed), (ReBcOpType::kLabel))              \
     77  V(WriteCurrentPositionToRegister, SET_REGISTER_TO_CP,                        \
     78    (register_index, cp_offset), (ReBcOpType::kRegister, ReBcOpType::kOffset)) \
     79  V(ReadCurrentPositionFromRegister, SET_CP_TO_REGISTER, (register_index),     \
     80    (ReBcOpType::kRegister))                                                   \
     81  V(WriteStackPointerToRegister, SET_REGISTER_TO_SP, (register_index),         \
     82    (ReBcOpType::kRegister))                                                   \
     83  V(ReadStackPointerFromRegister, SET_SP_TO_REGISTER, (register_index),        \
     84    (ReBcOpType::kRegister))                                                   \
     85  V(SetRegister, SET_REGISTER, (register_index, value),                        \
     86    (ReBcOpType::kRegister, ReBcOpType::kInt32))                               \
     87  V(AdvanceRegister, ADVANCE_REGISTER, (register_index, by),                   \
     88    (ReBcOpType::kRegister, ReBcOpType::kOffset))                              \
     89  V(PopCurrentPosition, POP_CP, (), ())                                        \
     90  V(PopBacktrack, POP_BT, (return_code), (ReBcOpType::kInt16))                 \
     91  V(PopRegister, POP_REGISTER, (register_index), (ReBcOpType::kRegister))      \
     92  V(Fail, FAIL, (), ())                                                        \
     93  V(Succeed, SUCCEED, (), ())                                                  \
     94  V(AdvanceCurrentPosition, ADVANCE_CP, (by), (ReBcOpType::kOffset))           \
     95  /* Jump to another bytecode given its offset.                             */ \
     96  V(GoTo, GOTO, (label), (ReBcOpType::kLabel))                                 \
     97  /* Check if offset is in range and load character at given offset.        */ \
     98  V(LoadCurrentCharacter, LOAD_CURRENT_CHAR, (cp_offset, on_failure),          \
     99    (ReBcOpType::kOffset, ReBcOpType::kLabel))                                 \
    100  /* Check if current character is equal to a given character               */ \
    101  V(CheckCharacter, CHECK_CHAR, (character, on_equal),                         \
    102    (ReBcOpType::kChar, ReBcOpType::kLabel))                                   \
    103  V(CheckNotCharacter, CHECK_NOT_CHAR, (character, on_not_equal),              \
    104    (ReBcOpType::kChar, ReBcOpType::kLabel))                                   \
    105  /* Checks if the current character combined with mask (bitwise and)       */ \
    106  /* matches a character (e.g. used when two characters in a disjunction    */ \
    107  /* differ by only a single bit                                            */ \
    108  V(CheckCharacterAfterAnd, AND_CHECK_CHAR, (character, mask, on_equal),       \
    109    (ReBcOpType::kChar, ReBcOpType::kChar, ReBcOpType::kLabel))                \
    110  V(CheckNotCharacterAfterAnd, AND_CHECK_NOT_CHAR,                             \
    111    (character, mask, on_not_equal),                                           \
    112    (ReBcOpType::kChar, ReBcOpType::kChar, ReBcOpType::kLabel))                \
    113  V(CheckNotCharacterAfterMinusAnd, MINUS_AND_CHECK_NOT_CHAR,                  \
    114    (character, minus, mask, on_not_equal),                                    \
    115    (ReBcOpType::kChar, ReBcOpType::kChar, ReBcOpType::kChar,                  \
    116     ReBcOpType::kLabel))                                                      \
    117  V(CheckCharacterInRange, CHECK_CHAR_IN_RANGE,                                \
    118    (padding, from, to, on_in_range),                                          \
    119    (ReBcOpType::kPadding, ReBcOpType::kChar, ReBcOpType::kChar,               \
    120     ReBcOpType::kLabel))                                                      \
    121  V(CheckCharacterNotInRange, CHECK_CHAR_NOT_IN_RANGE,                         \
    122    (padding, from, to, on_not_in_range),                                      \
    123    (ReBcOpType::kPadding, ReBcOpType::kChar, ReBcOpType::kChar,               \
    124     ReBcOpType::kLabel))                                                      \
    125  V(CheckLt, CHECK_LT, (limit, on_less),                                       \
    126    (ReBcOpType::kChar, ReBcOpType::kLabel))                                   \
    127  V(CheckGt, CHECK_GT, (limit, on_greater),                                    \
    128    (ReBcOpType::kChar, ReBcOpType::kLabel))                                   \
    129  /* TODO(pthier): CheckNotBackRef variants could be merged into a single */   \
    130  /* Bytecode without increasing the size */                                   \
    131  V(CheckNotBackRef, CHECK_NOT_BACK_REF, (start_reg, on_not_equal),            \
    132    (ReBcOpType::kRegister, ReBcOpType::kLabel))                               \
    133  V(CheckNotBackRefNoCase, CHECK_NOT_BACK_REF_NO_CASE,                         \
    134    (start_reg, on_not_equal), (ReBcOpType::kRegister, ReBcOpType::kLabel))    \
    135  V(CheckNotBackRefNoCaseUnicode, CHECK_NOT_BACK_REF_NO_CASE_UNICODE,          \
    136    (start_reg, on_not_equal), (ReBcOpType::kRegister, ReBcOpType::kLabel))    \
    137  V(CheckNotBackRefBackward, CHECK_NOT_BACK_REF_BACKWARD,                      \
    138    (start_reg, on_not_equal), (ReBcOpType::kRegister, ReBcOpType::kLabel))    \
    139  V(CheckNotBackRefNoCaseBackward, CHECK_NOT_BACK_REF_NO_CASE_BACKWARD,        \
    140    (start_reg, on_not_equal), (ReBcOpType::kRegister, ReBcOpType::kLabel))    \
    141  V(CheckNotBackRefNoCaseUnicodeBackward,                                      \
    142    CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, (start_reg, on_not_equal),    \
    143    (ReBcOpType::kRegister, ReBcOpType::kLabel))                               \
    144  V(CheckNotRegsEqual, CHECK_NOT_REGS_EQUAL, (reg1, reg2, on_not_equal),       \
    145    (ReBcOpType::kRegister, ReBcOpType::kRegister,                             \
    146     ReBcOpType::kLabel)) /* TODO(pthier): This Bytecode is unused. */         \
    147  V(IfRegisterLT, CHECK_REGISTER_LT,                                           \
    148    (register_index, comparand, on_less_than),                                 \
    149    (ReBcOpType::kRegister, ReBcOpType::kInt32, ReBcOpType::kLabel))           \
    150  V(IfRegisterGE, CHECK_REGISTER_GE,                                           \
    151    (register_index, comparand, on_greater_or_equal),                          \
    152    (ReBcOpType::kRegister, ReBcOpType::kInt32, ReBcOpType::kLabel))           \
    153  V(IfRegisterEqPos, CHECK_REGISTER_EQ_POS, (register_index, on_eq),           \
    154    (ReBcOpType::kRegister, ReBcOpType::kLabel))                               \
    155  V(CheckAtStart, CHECK_AT_START, (cp_offset, on_at_start),                    \
    156    (ReBcOpType::kOffset, ReBcOpType::kLabel))                                 \
    157  V(CheckNotAtStart, CHECK_NOT_AT_START, (cp_offset, on_not_at_start),         \
    158    (ReBcOpType::kOffset, ReBcOpType::kLabel))                                 \
    159  /* Checks if the current position matches top of backtrack stack          */ \
    160  V(CheckFixedLengthLoop, CHECK_FIXED_LENGTH,                                  \
    161    (on_tos_equals_current_position), (ReBcOpType::kLabel))                    \
    162  /* Advance character pointer by given offset and jump to another bytecode.*/ \
    163  V(SetCurrentPositionFromEnd, SET_CURRENT_POSITION_FROM_END, (by),            \
    164    (ReBcOpType::kOffset))
    165 
    166 // Bytecodes dealing with multiple characters, introduced due to special logic
    167 // in the bytecode-generator or requiring additional logic when assembling.
    168 // These share a method with Basic Bytecodes in RegExpMacroAssembler.
    169 // Format: V(CamelName, SNAKE_NAME, (OperandNames...), // (OperandTypes...))
    170 // TODO(pthier): SNAKE_NAME is temporary to static_assert that the new bytecode
    171 // enum and bytecode layouts are compatible with the old one. Remove once all
    172 // uses have been migrated.
    173 #define SPECIAL_BYTECODE_LIST(V)                                               \
    174  V(PushRegister, PUSH_REGISTER, (register_index), (ReBcOpType::kRegister))    \
    175  /* Load character at given offset without range checks.                   */ \
    176  V(LoadCurrentCharacterUnchecked, LOAD_CURRENT_CHAR_UNCHECKED, (cp_offset),   \
    177    (ReBcOpType::kOffset))                                                     \
    178  /* Checks if the current character matches any of the characters encoded  */ \
    179  /* in a bit table. Similar to/inspired by boyer moore string search       */ \
    180  /* Todo(pthier): Change order to (table, label) and move to Basic */         \
    181  V(CheckBitInTable, CHECK_BIT_IN_TABLE, (on_bit_set, table),                  \
    182    (ReBcOpType::kLabel, ReBcOpType::kBitTable))                               \
    183  V(Load2CurrentChars, LOAD_2_CURRENT_CHARS, (cp_offset, on_failure),          \
    184    (ReBcOpType::kOffset, ReBcOpType::kLabel))                                 \
    185  V(Load2CurrentCharsUnchecked, LOAD_2_CURRENT_CHARS_UNCHECKED, (cp_offset),   \
    186    (ReBcOpType::kOffset))                                                     \
    187  V(Load4CurrentChars, LOAD_4_CURRENT_CHARS, (cp_offset, on_failure),          \
    188    (ReBcOpType::kOffset, ReBcOpType::kLabel))                                 \
    189  V(Load4CurrentCharsUnchecked, LOAD_4_CURRENT_CHARS_UNCHECKED, (cp_offset),   \
    190    (ReBcOpType::kOffset))                                                     \
    191  V(Check4Chars, CHECK_4_CHARS, (characters, on_equal),                        \
    192    (ReBcOpType::kUint32, ReBcOpType::kLabel))                                 \
    193  V(CheckNot4Chars, CHECK_NOT_4_CHARS, (characters, on_not_equal),             \
    194    (ReBcOpType::kUint32, ReBcOpType::kLabel))                                 \
    195  V(AndCheck4Chars, AND_CHECK_4_CHARS, (characters, mask, on_equal),           \
    196    (ReBcOpType::kUint32, ReBcOpType::kUint32, ReBcOpType::kLabel))            \
    197  V(AndCheckNot4Chars, AND_CHECK_NOT_4_CHARS,                                  \
    198    (characters, mask, on_not_equal),                                          \
    199    (ReBcOpType::kUint32, ReBcOpType::kUint32, ReBcOpType::kLabel))            \
    200  V(AdvanceCpAndGoto, ADVANCE_CP_AND_GOTO, (by, on_goto),                      \
    201    (ReBcOpType::kOffset, ReBcOpType::kLabel))                                 \
    202  /* Checks if current position + given offset is in range.                 */ \
    203  V(CheckCurrentPosition, CHECK_CURRENT_POSITION, (cp_offset, on_failure),     \
    204    (ReBcOpType::kOffset, ReBcOpType::kLabel))
    205 
    206 // Bytecodes generated by peephole optimization. These don't have a direct
    207 // equivalent in the RegExpMacroAssembler.
    208 // Format: V(CamelName, SNAKE_NAME, // (OperandNames...), (OperandTypes...))
    209 // TODO(pthier): SNAKE_NAME is temporary to static_assert that the new bytecode
    210 // enum and bytecode layouts are compatible with the old one. Remove once all
    211 // uses have been migrated.
    212 #define PEEPHOLE_BYTECODE_LIST(V)                                              \
    213  /* Combination of:                                                        */ \
    214  /* LOAD_CURRENT_CHAR, CHECK_BIT_IN_TABLE and ADVANCE_CP_AND_GOTO          */ \
    215  V(SkipUntilBitInTable, SKIP_UNTIL_BIT_IN_TABLE,                              \
    216    (cp_offset, advance_by, table, on_match, on_no_match),                     \
    217    (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kBitTable,          \
    218     ReBcOpType::kLabel, ReBcOpType::kLabel))                                  \
    219  /* Combination of:                                                        */ \
    220  /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, AND_CHECK_CHAR    */ \
    221  /* and ADVANCE_CP_AND_GOTO                                                */ \
    222  V(SkipUntilCharAnd, SKIP_UNTIL_CHAR_AND,                                     \
    223    (cp_offset, advance_by, character, mask, eats_at_least, on_match,          \
    224     on_no_match),                                                             \
    225    (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kChar,              \
    226     ReBcOpType::kChar, ReBcOpType::kUint32, ReBcOpType::kLabel,               \
    227     ReBcOpType::kLabel)) /* TODO(pthier): eats_at_least should be Offset */   \
    228  /* Combination of:                                                        */ \
    229  /* LOAD_CURRENT_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO                  */ \
    230  V(SkipUntilChar, SKIP_UNTIL_CHAR,                                            \
    231    (cp_offset, advance_by, character, on_match, on_no_match),                 \
    232    (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kChar,              \
    233     ReBcOpType::kLabel, ReBcOpType::kLabel))                                  \
    234  /* Combination of:                                                        */ \
    235  /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, CHECK_CHAR        */ \
    236  /* and ADVANCE_CP_AND_GOTO                                                */ \
    237  V(SkipUntilCharPosChecked, SKIP_UNTIL_CHAR_POS_CHECKED,                      \
    238    (cp_offset, advance_by, character, eats_at_least, on_match, on_no_match),  \
    239    (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kChar,              \
    240     ReBcOpType::kUint32, ReBcOpType::kLabel, ReBcOpType::kLabel))             \
    241  /* TODO(pthier): eats_at_least should be Offset instead of Uint32 */         \
    242  /* Combination of:                                                        */ \
    243  /* LOAD_CURRENT_CHAR, CHECK_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO      */ \
    244  V(SkipUntilCharOrChar, SKIP_UNTIL_CHAR_OR_CHAR,                              \
    245    (cp_offset, advance_by, padding, char1, char2, on_match, on_no_match),     \
    246    (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kPadding,           \
    247     ReBcOpType::kChar, ReBcOpType::kChar, ReBcOpType::kLabel,                 \
    248     ReBcOpType::kLabel))                                                      \
    249  /* Combination of:                                                        */ \
    250  /* LOAD_CURRENT_CHAR, CHECK_GT, CHECK_BIT_IN_TABLE, GOTO and              */ \
    251  /* and ADVANCE_CP_AND_GOTO                                                */ \
    252  V(SkipUntilGtOrNotBitInTable, SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE,             \
    253    (cp_offset, advance_by, character, table, on_match, on_no_match),          \
    254    (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kChar,              \
    255     ReBcOpType::kBitTable, ReBcOpType::kLabel, ReBcOpType::kLabel))
    256 
    257 #define REGEXP_BYTECODE_LIST(V) \
    258  INVALID_BYTECODE_LIST(V)      \
    259  BASIC_BYTECODE_LIST(V)        \
    260  SPECIAL_BYTECODE_LIST(V)      \
    261  PEEPHOLE_BYTECODE_LIST(V)
    262 
    263 // The list of bytecodes, in format: V(Name, Code, ByteLength).
    264 // TODO(pthier): Argument offsets of bytecodes should be easily accessible by
    265 // name or at least by position.
    266 // TODO(jgruber): More precise types (e.g. int32/uint32 instead of value32).
    267 #define BYTECODE_ITERATOR(V)                                                   \
    268  V(BREAK, 0, 4)              /* bc8                                        */ \
    269  V(PUSH_CP, 1, 4)            /* bc8 pad24                                  */ \
    270  V(PUSH_BT, 2, 8)            /* bc8 pad24 offset32                         */ \
    271  V(SET_REGISTER_TO_CP, 3, 8) /* bc8 reg_idx24 offset32                     */ \
    272  V(SET_CP_TO_REGISTER, 4, 4) /* bc8 reg_idx24                              */ \
    273  V(SET_REGISTER_TO_SP, 5, 4) /* bc8 reg_idx24                              */ \
    274  V(SET_SP_TO_REGISTER, 6, 4) /* bc8 reg_idx24                              */ \
    275  V(SET_REGISTER, 7, 8)       /* bc8 reg_idx24 value32                      */ \
    276  V(ADVANCE_REGISTER, 8, 8)   /* bc8 reg_idx24 value32                      */ \
    277  V(POP_CP, 9, 4)             /* bc8 pad24                                  */ \
    278  V(POP_BT, 10, 4)            /* bc8 pad24                                  */ \
    279  V(POP_REGISTER, 11, 4)      /* bc8 reg_idx24                              */ \
    280  V(FAIL, 12, 4)              /* bc8 pad24                                  */ \
    281  V(SUCCEED, 13, 4)           /* bc8 pad24                                  */ \
    282  V(ADVANCE_CP, 14, 4)        /* bc8 offset24                               */ \
    283  /* Jump to another bytecode given its offset.                             */ \
    284  /* Bit Layout:                                                            */ \
    285  /* 0x00 - 0x07:   0x10 (fixed) Bytecode                                   */ \
    286  /* 0x08 - 0x1F:   0x00 (unused) Padding                                   */ \
    287  /* 0x20 - 0x3F:   Address of bytecode to jump to                          */ \
    288  V(GOTO, 15, 8) /* bc8 pad24 addr32                                        */ \
    289  /* Check if offset is in range and load character at given offset.        */ \
    290  /* Bit Layout:                                                            */ \
    291  /* 0x00 - 0x07:   0x11 (fixed) Bytecode                                   */ \
    292  /* 0x08 - 0x1F:   Offset from current position                            */ \
    293  /* 0x20 - 0x3F:   Address of bytecode when load is out of range           */ \
    294  V(LOAD_CURRENT_CHAR, 16, 8) /* bc8 offset24 addr32                        */ \
    295  /* Check if current character is equal to a given character               */ \
    296  /* Bit Layout:                                                            */ \
    297  /* 0x00 - 0x07:   0x19 (fixed) Bytecode                                   */ \
    298  /* 0x08 - 0x0F:   0x00 (unused) Padding                                   */ \
    299  /* 0x10 - 0x1F:   Character to check                                      */ \
    300  /* 0x20 - 0x3F:   Address of bytecode when matched                        */ \
    301  V(CHECK_CHAR, 17, 8)     /* bc8 pad8 uint16 addr32                        */ \
    302  V(CHECK_NOT_CHAR, 18, 8) /* bc8 pad8 uint16 addr32                        */ \
    303  /* Checks if the current character combined with mask (bitwise and)       */ \
    304  /* matches a character (e.g. used when two characters in a disjunction    */ \
    305  /* differ by only a single bit                                            */ \
    306  /* Bit Layout:                                                            */ \
    307  /* 0x00 - 0x07:   0x1c (fixed) Bytecode                                   */ \
    308  /* 0x08 - 0x0F:   0x00 (unused) Padding                                   */ \
    309  /* 0x10 - 0x1F:   Character to match against (after mask aplied)          */ \
    310  /* 0x20 - 0x3F:   Bitmask bitwise and combined with current character     */ \
    311  /* 0x40 - 0x5F:   Address of bytecode when matched                        */ \
    312  V(AND_CHECK_CHAR, 19, 12)     /* bc8 pad8 uint16 uint32 addr32            */ \
    313  V(AND_CHECK_NOT_CHAR, 20, 12) /* bc8 pad8 uint16 uint32 addr32            */ \
    314  V(MINUS_AND_CHECK_NOT_CHAR, 21,                                              \
    315    12) /* bc8 pad8 base::uc16 base::uc16 base::uc16 addr32                 */ \
    316  V(CHECK_CHAR_IN_RANGE, 22, 12) /* bc8 pad24 base::uc16 base::uc16 addr32  */ \
    317  V(CHECK_CHAR_NOT_IN_RANGE, 23,                                               \
    318    12) /* bc8 pad24 base::uc16 base::uc16 addr32                           */ \
    319  V(CHECK_LT, 24, 8)                   /* bc8 pad8 base::uc16 addr32        */ \
    320  V(CHECK_GT, 25, 8)                   /* bc8 pad8 base::uc16 addr32        */ \
    321  V(CHECK_NOT_BACK_REF, 26, 8)         /* bc8 reg_idx24 addr32              */ \
    322  V(CHECK_NOT_BACK_REF_NO_CASE, 27, 8) /* bc8 reg_idx24 addr32              */ \
    323  V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 28, 8)                                 \
    324  V(CHECK_NOT_BACK_REF_BACKWARD, 29, 8)         /* bc8 reg_idx24 addr32     */ \
    325  V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 30, 8) /* bc8 reg_idx24 addr32     */ \
    326  V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 31, 8)                        \
    327  V(CHECK_NOT_REGS_EQUAL, 32, 12) /* bc8 regidx24 reg_idx32 addr32          */ \
    328  V(CHECK_REGISTER_LT, 33, 12)    /* bc8 reg_idx24 value32 addr32           */ \
    329  V(CHECK_REGISTER_GE, 34, 12)    /* bc8 reg_idx24 value32 addr32           */ \
    330  V(CHECK_REGISTER_EQ_POS, 35, 8) /* bc8 reg_idx24 addr32                   */ \
    331  V(CHECK_AT_START, 36, 8)        /* bc8 pad24 addr32                       */ \
    332  V(CHECK_NOT_AT_START, 37, 8)    /* bc8 offset24 addr32                    */ \
    333  /* Checks if the current position matches top of backtrack stack          */ \
    334  /* Bit Layout:                                                            */ \
    335  /* 0x00 - 0x07:   0x31 (fixed) Bytecode                                   */ \
    336  /* 0x08 - 0x1F:   0x00 (unused) Padding                                   */ \
    337  /* 0x20 - 0x3F:   Address of bytecode when current matches tos            */ \
    338  V(CHECK_FIXED_LENGTH, 38, 8) /* bc8 pad24 addr32                          */ \
    339  /* Advance character pointer by given offset and jump to another bytecode.*/ \
    340  /* Bit Layout:                                                            */ \
    341  /* 0x00 - 0x07:   0x32 (fixed) Bytecode                                   */ \
    342  /* 0x08 - 0x1F:   Number of characters to advance                         */ \
    343  /* 0x20 - 0x3F:   Address of bytecode to jump to                          */ \
    344  V(SET_CURRENT_POSITION_FROM_END, 39, 4) /* bc8 idx24                      */ \
    345  V(PUSH_REGISTER, 40, 4)                 /* bc8 reg_idx24                  */ \
    346  /* Load character at given offset without range checks.                   */ \
    347  /* Bit Layout:                                                            */ \
    348  /* 0x00 - 0x07:   0x12 (fixed) Bytecode                                   */ \
    349  /* 0x08 - 0x1F:   Offset from current position                            */ \
    350  V(LOAD_CURRENT_CHAR_UNCHECKED, 41, 4) /* bc8 offset24                     */ \
    351  /* Checks if the current character matches any of the characters encoded  */ \
    352  /* in a bit table. Similar to/inspired by boyer moore string search       */ \
    353  /* Bit Layout:                                                            */ \
    354  /* 0x00 - 0x07:   0x22 (fixed) Bytecode                                   */ \
    355  /* 0x08 - 0x1F:   0x00 (unused) Padding                                   */ \
    356  /* 0x20 - 0x3F:   Address of bytecode when bit is set                     */ \
    357  /* 0x40 - 0xBF:   Bit table                                               */ \
    358  V(CHECK_BIT_IN_TABLE, 42, 24)            /* bc8 pad24 addr32 bits128      */ \
    359  V(LOAD_2_CURRENT_CHARS, 43, 8)           /* bc8 offset24 addr32           */ \
    360  V(LOAD_2_CURRENT_CHARS_UNCHECKED, 44, 4) /* bc8 offset24                  */ \
    361  V(LOAD_4_CURRENT_CHARS, 45, 8)           /* bc8 offset24 addr32           */ \
    362  V(LOAD_4_CURRENT_CHARS_UNCHECKED, 46, 4) /* bc8 offset24                  */ \
    363  V(CHECK_4_CHARS, 47, 12)                 /* bc8 pad24 uint32 addr32       */ \
    364  V(CHECK_NOT_4_CHARS, 48, 12)             /* bc8 pad24 uint32 addr32       */ \
    365  V(AND_CHECK_4_CHARS, 49, 16)             /* bc8 pad24 uint32 uint32 addr32*/ \
    366  V(AND_CHECK_NOT_4_CHARS, 50, 16)         /* bc8 pad24 uint32 uint32 addr32*/ \
    367  V(ADVANCE_CP_AND_GOTO, 51, 8)            /* bc8 offset24 addr32           */ \
    368  /* Checks if current position + given offset is in range.                 */ \
    369  /* Bit Layout:                                                            */ \
    370  /* 0x00 - 0x07:   0x34 (fixed) Bytecode                                   */ \
    371  /* 0x08 - 0x1F:   Offset from current position                            */ \
    372  /* 0x20 - 0x3F:   Address of bytecode when position is out of range       */ \
    373  V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32                      */ \
    374  /* Combination of:                                                        */ \
    375  /* LOAD_CURRENT_CHAR, CHECK_BIT_IN_TABLE and ADVANCE_CP_AND_GOTO          */ \
    376  /* Emitted by RegExpBytecodePeepholeOptimization.                         */ \
    377  /* Bit Layout:                                                            */ \
    378  /* 0x00 - 0x07    0x35 (fixed) Bytecode                                   */ \
    379  /* 0x08 - 0x1F    Load character offset from current position             */ \
    380  /* 0x20 - 0x3F    Number of characters to advance                         */ \
    381  /* 0x40 - 0xBF    Bit Table                                               */ \
    382  /* 0xC0 - 0xDF    Address of bytecode when character is matched           */ \
    383  /* 0xE0 - 0xFF    Address of bytecode when no match                       */ \
    384  V(SKIP_UNTIL_BIT_IN_TABLE, 53, 32)                                           \
    385  /* Combination of:                                                        */ \
    386  /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, AND_CHECK_CHAR    */ \
    387  /* and ADVANCE_CP_AND_GOTO                                                */ \
    388  /* Emitted by RegExpBytecodePeepholeOptimization.                         */ \
    389  /* Bit Layout:                                                            */ \
    390  /* 0x00 - 0x07    0x36 (fixed) Bytecode                                   */ \
    391  /* 0x08 - 0x1F    Load character offset from current position             */ \
    392  /* 0x20 - 0x2F    Number of characters to advance                         */ \
    393  /* 0x30 - 0x3F    Character to match against (after mask applied)         */ \
    394  /* 0x40 - 0x5F:   Bitmask bitwise and combined with current character     */ \
    395  /* 0x60 - 0x7F    Minimum number of characters this pattern consumes      */ \
    396  /* 0x80 - 0x9F    Address of bytecode when character is matched           */ \
    397  /* 0xA0 - 0xBF    Address of bytecode when no match                       */ \
    398  V(SKIP_UNTIL_CHAR_AND, 54, 24)                                               \
    399  /* Combination of:                                                        */ \
    400  /* LOAD_CURRENT_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO                  */ \
    401  /* Emitted by RegExpBytecodePeepholeOptimization.                         */ \
    402  /* Bit Layout:                                                            */ \
    403  /* 0x00 - 0x07    0x37 (fixed) Bytecode                                   */ \
    404  /* 0x08 - 0x1F    Load character offset from current position             */ \
    405  /* 0x20 - 0x2F    Number of characters to advance                         */ \
    406  /* 0x30 - 0x3F    Character to match                                      */ \
    407  /* 0x40 - 0x5F    Address of bytecode when character is matched           */ \
    408  /* 0x60 - 0x7F    Address of bytecode when no match                       */ \
    409  V(SKIP_UNTIL_CHAR, 55, 16)                                                   \
    410  /* Combination of:                                                        */ \
    411  /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, CHECK_CHAR        */ \
    412  /* and ADVANCE_CP_AND_GOTO                                                */ \
    413  /* Emitted by RegExpBytecodePeepholeOptimization.                         */ \
    414  /* Bit Layout:                                                            */ \
    415  /* 0x00 - 0x07    0x38 (fixed) Bytecode                                   */ \
    416  /* 0x08 - 0x1F    Load character offset from current position             */ \
    417  /* 0x20 - 0x2F    Number of characters to advance                         */ \
    418  /* 0x30 - 0x3F    Character to match                                      */ \
    419  /* 0x40 - 0x5F    Minimum number of characters this pattern consumes      */ \
    420  /* 0x60 - 0x7F    Address of bytecode when character is matched           */ \
    421  /* 0x80 - 0x9F    Address of bytecode when no match                       */ \
    422  V(SKIP_UNTIL_CHAR_POS_CHECKED, 56, 20)                                       \
    423  /* Combination of:                                                        */ \
    424  /* LOAD_CURRENT_CHAR, CHECK_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO      */ \
    425  /* Emitted by RegExpBytecodePeepholeOptimization.                         */ \
    426  /* Bit Layout:                                                            */ \
    427  /* 0x00 - 0x07    0x39 (fixed) Bytecode                                   */ \
    428  /* 0x08 - 0x1F    Load character offset from current position             */ \
    429  /* 0x20 - 0x3F    Number of characters to advance                         */ \
    430  /* 0x40 - 0x4F    Character to match                                      */ \
    431  /* 0x50 - 0x5F    Other Character to match                                */ \
    432  /* 0x60 - 0x7F    Address of bytecode when either character is matched    */ \
    433  /* 0x80 - 0x9F    Address of bytecode when no match                       */ \
    434  V(SKIP_UNTIL_CHAR_OR_CHAR, 57, 20)                                           \
    435  /* Combination of:                                                        */ \
    436  /* LOAD_CURRENT_CHAR, CHECK_GT, CHECK_BIT_IN_TABLE, GOTO and              */ \
    437  /* and ADVANCE_CP_AND_GOTO                                                */ \
    438  /* Emitted by RegExpBytecodePeepholeOptimization.                         */ \
    439  /* Bit Layout:                                                            */ \
    440  /* 0x00 - 0x07    0x3A (fixed) Bytecode                                   */ \
    441  /* 0x08 - 0x1F    Load character offset from current position             */ \
    442  /* 0x20 - 0x2F    Number of characters to advance                         */ \
    443  /* 0x30 - 0x3F    Character to check if it is less than current char      */ \
    444  /* 0x40 - 0xBF    Bit Table                                               */ \
    445  /* 0xC0 - 0xDF    Address of bytecode when character is matched           */ \
    446  /* 0xE0 - 0xFF    Address of bytecode when no match                       */ \
    447  V(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE, 58, 32)
    448 
    449 #define COUNT(...) +1
    450 static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT);
    451 #undef COUNT
    452 
    453 enum class RegExpBytecode : uint8_t {
    454 #define DECLARE_BYTECODE(CamelName, ...) k##CamelName,
    455  REGEXP_BYTECODE_LIST(DECLARE_BYTECODE)
    456 #undef DECLARE_BYTECODE
    457 #define COUNT_BYTECODE(x, ...) +1
    458  // The COUNT_BYTECODE macro will turn this into kLast = -1 +1 +1... which will
    459  // evaluate to the same value as the last real bytecode.
    460  kLast = -1 REGEXP_BYTECODE_LIST(COUNT_BYTECODE)
    461 };
    462 
    463 template <RegExpBytecode bc>
    464 class RegExpBytecodeOperands;
    465 
    466 class RegExpBytecodes final : public AllStatic {
    467 public:
    468  static constexpr int kCount = static_cast<uint8_t>(RegExpBytecode::kLast) + 1;
    469  static constexpr uint8_t ToByte(RegExpBytecode bc) {
    470    return static_cast<uint8_t>(bc);
    471  }
    472  static constexpr RegExpBytecode FromByte(uint8_t byte) {
    473    DCHECK_LT(byte, kCount);
    474    return static_cast<RegExpBytecode>(byte);
    475  }
    476  static constexpr const char* Name(RegExpBytecode bytecode);
    477  static constexpr const char* Name(uint8_t bytecode);
    478 
    479  static constexpr uint8_t Size(RegExpBytecode bytecode);
    480  static constexpr uint8_t Size(uint8_t bytecode);
    481 };
    482 
    483 // Just making sure we assigned values above properly. They should be
    484 // contiguous, strictly increasing, and start at 0.
    485 // TODO(jgruber): Do not explicitly assign values, instead generate them
    486 // implicitly from the list order.
    487 static_assert(kRegExpBytecodeCount == 59);
    488 
    489 #define DECLARE_BYTECODES(name, code, length) \
    490  static constexpr int BC_##name = code;
    491 BYTECODE_ITERATOR(DECLARE_BYTECODES)
    492 #undef DECLARE_BYTECODES
    493 
    494 static constexpr int kRegExpBytecodeLengths[] = {
    495 #define DECLARE_BYTECODE_LENGTH(name, code, length) length,
    496    BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
    497 #undef DECLARE_BYTECODE_LENGTH
    498 };
    499 
    500 inline constexpr int RegExpBytecodeLength(int bytecode) {
    501  DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1));
    502  return kRegExpBytecodeLengths[bytecode];
    503 }
    504 
    505 static constexpr const char* const kRegExpBytecodeNames[] = {
    506 #define DECLARE_BYTECODE_NAME(name, ...) #name,
    507    BYTECODE_ITERATOR(DECLARE_BYTECODE_NAME)
    508 #undef DECLARE_BYTECODE_NAME
    509 };
    510 
    511 inline constexpr const char* RegExpBytecodeName(int bytecode) {
    512  DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1));
    513  return kRegExpBytecodeNames[bytecode];
    514 }
    515 
    516 void RegExpBytecodeDisassembleSingle(const uint8_t* code_base,
    517                                     const uint8_t* pc);
    518 void RegExpBytecodeDisassemble(const uint8_t* code_base, int length,
    519                               const char* pattern);
    520 
    521 }  // namespace internal
    522 }  // namespace v8
    523 
    524 #endif  // V8_REGEXP_REGEXP_BYTECODES_H_