regexp-bytecodes.h (35213B)
1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_REGEXP_REGEXP_BYTECODES_H_ 6 #define V8_REGEXP_REGEXP_BYTECODES_H_ 7 8 #include "irregexp/RegExpShim.h" 9 10 namespace v8 { 11 namespace internal { 12 13 // Maximum number of bytecodes that will be used (next power of 2 of actually 14 // defined bytecodes). 15 // All slots between the last actually defined bytecode and maximum id will be 16 // filled with BREAKs, indicating an invalid operation. This way using 17 // BYTECODE_MASK guarantees no OOB access to the dispatch table. 18 constexpr int kRegExpPaddedBytecodeCount = 1 << 6; 19 constexpr int BYTECODE_MASK = kRegExpPaddedBytecodeCount - 1; 20 // The first argument is packed in with the byte code in one word, but so it 21 // has 24 bits, but it can be positive and negative so only use 23 bits for 22 // positive values. 23 const unsigned int MAX_FIRST_ARG = 0x7fffffu; 24 const int BYTECODE_SHIFT = 8; 25 static_assert(1 << BYTECODE_SHIFT > BYTECODE_MASK); 26 27 // Basic operand types that have a direct mapping to a C-type. 28 // Getters/Setters for these are fully auto-generated. 29 // Format: V(Name, C type) 30 #define BASIC_BYTECODE_OPERAND_TYPE_LIST(V) \ 31 V(Int16, int16_t) \ 32 V(Int32, int32_t) \ 33 V(Uint32, uint32_t) \ 34 V(Char, base::uc16) \ 35 V(Label, uint32_t) \ 36 V(Offset, int16_t) \ 37 V(Register, uint16_t) 38 39 // Special operand types that don't have a direct mapping to a C-type. 40 // Getters/Setters for these types need to be specialized manually. 41 #define SPECIAL_BYTECODE_OPERAND_TYPE_LIST(V) \ 42 V(BitTable, 16) \ 43 V(Padding, 2) /* TODO(pthier): padding is only required for backwards \ 44 compatibility with the old layout. It can be removed after everything is \ 45 using the new layout. */ 46 47 #define BYTECODE_OPERAND_TYPE_LIST(V) \ 48 BASIC_BYTECODE_OPERAND_TYPE_LIST(V) \ 49 SPECIAL_BYTECODE_OPERAND_TYPE_LIST(V) 50 51 enum class RegExpBytecodeOperandType : uint8_t { 52 #define DECLARE_OPERAND(Name, ...) k##Name, 53 BYTECODE_OPERAND_TYPE_LIST(DECLARE_OPERAND) 54 #undef DECLARE_OPERAND 55 }; 56 57 using ReBcOpType = RegExpBytecodeOperandType; 58 59 // Bytecodes that indicate something is invalid. These don't have a direct 60 // equivalent in RegExpMacroAssembler. 61 // It's a requirement that BREAK has an enum value of 0 (as e.g. jumps to offset 62 // 0 are considered invalid). 63 // Format: V(CamelName, SNAKE_NAME, (OperandNames...), // (OperandTypes...)) 64 // TODO(pthier): SNAKE_NAME is temporary to static_assert that the new bytecode 65 // enum and bytecode layouts are compatible with the old one. Remove once all 66 // uses have been migrated. 67 #define INVALID_BYTECODE_LIST(V) V(Break, BREAK, (), ()) 68 69 // Basic Bytecodes. These have a direct equivalent in the RegExpMacroAssembler. 70 // Format: V(CamelName, SNAKE_NAME, (OperandNames...), (OperandTypes...)) 71 // TODO(pthier): SNAKE_NAME is temporary to static_assert that the new bytecode 72 // enum and bytecode layouts are compatible with the old one. Remove once all 73 // uses have been migrated. 74 #define BASIC_BYTECODE_LIST(V) \ 75 V(PushCurrentPosition, PUSH_CP, (), ()) \ 76 V(PushBacktrack, PUSH_BT, (on_bt_pushed), (ReBcOpType::kLabel)) \ 77 V(WriteCurrentPositionToRegister, SET_REGISTER_TO_CP, \ 78 (register_index, cp_offset), (ReBcOpType::kRegister, ReBcOpType::kOffset)) \ 79 V(ReadCurrentPositionFromRegister, SET_CP_TO_REGISTER, (register_index), \ 80 (ReBcOpType::kRegister)) \ 81 V(WriteStackPointerToRegister, SET_REGISTER_TO_SP, (register_index), \ 82 (ReBcOpType::kRegister)) \ 83 V(ReadStackPointerFromRegister, SET_SP_TO_REGISTER, (register_index), \ 84 (ReBcOpType::kRegister)) \ 85 V(SetRegister, SET_REGISTER, (register_index, value), \ 86 (ReBcOpType::kRegister, ReBcOpType::kInt32)) \ 87 V(AdvanceRegister, ADVANCE_REGISTER, (register_index, by), \ 88 (ReBcOpType::kRegister, ReBcOpType::kOffset)) \ 89 V(PopCurrentPosition, POP_CP, (), ()) \ 90 V(PopBacktrack, POP_BT, (return_code), (ReBcOpType::kInt16)) \ 91 V(PopRegister, POP_REGISTER, (register_index), (ReBcOpType::kRegister)) \ 92 V(Fail, FAIL, (), ()) \ 93 V(Succeed, SUCCEED, (), ()) \ 94 V(AdvanceCurrentPosition, ADVANCE_CP, (by), (ReBcOpType::kOffset)) \ 95 /* Jump to another bytecode given its offset. */ \ 96 V(GoTo, GOTO, (label), (ReBcOpType::kLabel)) \ 97 /* Check if offset is in range and load character at given offset. */ \ 98 V(LoadCurrentCharacter, LOAD_CURRENT_CHAR, (cp_offset, on_failure), \ 99 (ReBcOpType::kOffset, ReBcOpType::kLabel)) \ 100 /* Check if current character is equal to a given character */ \ 101 V(CheckCharacter, CHECK_CHAR, (character, on_equal), \ 102 (ReBcOpType::kChar, ReBcOpType::kLabel)) \ 103 V(CheckNotCharacter, CHECK_NOT_CHAR, (character, on_not_equal), \ 104 (ReBcOpType::kChar, ReBcOpType::kLabel)) \ 105 /* Checks if the current character combined with mask (bitwise and) */ \ 106 /* matches a character (e.g. used when two characters in a disjunction */ \ 107 /* differ by only a single bit */ \ 108 V(CheckCharacterAfterAnd, AND_CHECK_CHAR, (character, mask, on_equal), \ 109 (ReBcOpType::kChar, ReBcOpType::kChar, ReBcOpType::kLabel)) \ 110 V(CheckNotCharacterAfterAnd, AND_CHECK_NOT_CHAR, \ 111 (character, mask, on_not_equal), \ 112 (ReBcOpType::kChar, ReBcOpType::kChar, ReBcOpType::kLabel)) \ 113 V(CheckNotCharacterAfterMinusAnd, MINUS_AND_CHECK_NOT_CHAR, \ 114 (character, minus, mask, on_not_equal), \ 115 (ReBcOpType::kChar, ReBcOpType::kChar, ReBcOpType::kChar, \ 116 ReBcOpType::kLabel)) \ 117 V(CheckCharacterInRange, CHECK_CHAR_IN_RANGE, \ 118 (padding, from, to, on_in_range), \ 119 (ReBcOpType::kPadding, ReBcOpType::kChar, ReBcOpType::kChar, \ 120 ReBcOpType::kLabel)) \ 121 V(CheckCharacterNotInRange, CHECK_CHAR_NOT_IN_RANGE, \ 122 (padding, from, to, on_not_in_range), \ 123 (ReBcOpType::kPadding, ReBcOpType::kChar, ReBcOpType::kChar, \ 124 ReBcOpType::kLabel)) \ 125 V(CheckLt, CHECK_LT, (limit, on_less), \ 126 (ReBcOpType::kChar, ReBcOpType::kLabel)) \ 127 V(CheckGt, CHECK_GT, (limit, on_greater), \ 128 (ReBcOpType::kChar, ReBcOpType::kLabel)) \ 129 /* TODO(pthier): CheckNotBackRef variants could be merged into a single */ \ 130 /* Bytecode without increasing the size */ \ 131 V(CheckNotBackRef, CHECK_NOT_BACK_REF, (start_reg, on_not_equal), \ 132 (ReBcOpType::kRegister, ReBcOpType::kLabel)) \ 133 V(CheckNotBackRefNoCase, CHECK_NOT_BACK_REF_NO_CASE, \ 134 (start_reg, on_not_equal), (ReBcOpType::kRegister, ReBcOpType::kLabel)) \ 135 V(CheckNotBackRefNoCaseUnicode, CHECK_NOT_BACK_REF_NO_CASE_UNICODE, \ 136 (start_reg, on_not_equal), (ReBcOpType::kRegister, ReBcOpType::kLabel)) \ 137 V(CheckNotBackRefBackward, CHECK_NOT_BACK_REF_BACKWARD, \ 138 (start_reg, on_not_equal), (ReBcOpType::kRegister, ReBcOpType::kLabel)) \ 139 V(CheckNotBackRefNoCaseBackward, CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, \ 140 (start_reg, on_not_equal), (ReBcOpType::kRegister, ReBcOpType::kLabel)) \ 141 V(CheckNotBackRefNoCaseUnicodeBackward, \ 142 CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, (start_reg, on_not_equal), \ 143 (ReBcOpType::kRegister, ReBcOpType::kLabel)) \ 144 V(CheckNotRegsEqual, CHECK_NOT_REGS_EQUAL, (reg1, reg2, on_not_equal), \ 145 (ReBcOpType::kRegister, ReBcOpType::kRegister, \ 146 ReBcOpType::kLabel)) /* TODO(pthier): This Bytecode is unused. */ \ 147 V(IfRegisterLT, CHECK_REGISTER_LT, \ 148 (register_index, comparand, on_less_than), \ 149 (ReBcOpType::kRegister, ReBcOpType::kInt32, ReBcOpType::kLabel)) \ 150 V(IfRegisterGE, CHECK_REGISTER_GE, \ 151 (register_index, comparand, on_greater_or_equal), \ 152 (ReBcOpType::kRegister, ReBcOpType::kInt32, ReBcOpType::kLabel)) \ 153 V(IfRegisterEqPos, CHECK_REGISTER_EQ_POS, (register_index, on_eq), \ 154 (ReBcOpType::kRegister, ReBcOpType::kLabel)) \ 155 V(CheckAtStart, CHECK_AT_START, (cp_offset, on_at_start), \ 156 (ReBcOpType::kOffset, ReBcOpType::kLabel)) \ 157 V(CheckNotAtStart, CHECK_NOT_AT_START, (cp_offset, on_not_at_start), \ 158 (ReBcOpType::kOffset, ReBcOpType::kLabel)) \ 159 /* Checks if the current position matches top of backtrack stack */ \ 160 V(CheckFixedLengthLoop, CHECK_FIXED_LENGTH, \ 161 (on_tos_equals_current_position), (ReBcOpType::kLabel)) \ 162 /* Advance character pointer by given offset and jump to another bytecode.*/ \ 163 V(SetCurrentPositionFromEnd, SET_CURRENT_POSITION_FROM_END, (by), \ 164 (ReBcOpType::kOffset)) 165 166 // Bytecodes dealing with multiple characters, introduced due to special logic 167 // in the bytecode-generator or requiring additional logic when assembling. 168 // These share a method with Basic Bytecodes in RegExpMacroAssembler. 169 // Format: V(CamelName, SNAKE_NAME, (OperandNames...), // (OperandTypes...)) 170 // TODO(pthier): SNAKE_NAME is temporary to static_assert that the new bytecode 171 // enum and bytecode layouts are compatible with the old one. Remove once all 172 // uses have been migrated. 173 #define SPECIAL_BYTECODE_LIST(V) \ 174 V(PushRegister, PUSH_REGISTER, (register_index), (ReBcOpType::kRegister)) \ 175 /* Load character at given offset without range checks. */ \ 176 V(LoadCurrentCharacterUnchecked, LOAD_CURRENT_CHAR_UNCHECKED, (cp_offset), \ 177 (ReBcOpType::kOffset)) \ 178 /* Checks if the current character matches any of the characters encoded */ \ 179 /* in a bit table. Similar to/inspired by boyer moore string search */ \ 180 /* Todo(pthier): Change order to (table, label) and move to Basic */ \ 181 V(CheckBitInTable, CHECK_BIT_IN_TABLE, (on_bit_set, table), \ 182 (ReBcOpType::kLabel, ReBcOpType::kBitTable)) \ 183 V(Load2CurrentChars, LOAD_2_CURRENT_CHARS, (cp_offset, on_failure), \ 184 (ReBcOpType::kOffset, ReBcOpType::kLabel)) \ 185 V(Load2CurrentCharsUnchecked, LOAD_2_CURRENT_CHARS_UNCHECKED, (cp_offset), \ 186 (ReBcOpType::kOffset)) \ 187 V(Load4CurrentChars, LOAD_4_CURRENT_CHARS, (cp_offset, on_failure), \ 188 (ReBcOpType::kOffset, ReBcOpType::kLabel)) \ 189 V(Load4CurrentCharsUnchecked, LOAD_4_CURRENT_CHARS_UNCHECKED, (cp_offset), \ 190 (ReBcOpType::kOffset)) \ 191 V(Check4Chars, CHECK_4_CHARS, (characters, on_equal), \ 192 (ReBcOpType::kUint32, ReBcOpType::kLabel)) \ 193 V(CheckNot4Chars, CHECK_NOT_4_CHARS, (characters, on_not_equal), \ 194 (ReBcOpType::kUint32, ReBcOpType::kLabel)) \ 195 V(AndCheck4Chars, AND_CHECK_4_CHARS, (characters, mask, on_equal), \ 196 (ReBcOpType::kUint32, ReBcOpType::kUint32, ReBcOpType::kLabel)) \ 197 V(AndCheckNot4Chars, AND_CHECK_NOT_4_CHARS, \ 198 (characters, mask, on_not_equal), \ 199 (ReBcOpType::kUint32, ReBcOpType::kUint32, ReBcOpType::kLabel)) \ 200 V(AdvanceCpAndGoto, ADVANCE_CP_AND_GOTO, (by, on_goto), \ 201 (ReBcOpType::kOffset, ReBcOpType::kLabel)) \ 202 /* Checks if current position + given offset is in range. */ \ 203 V(CheckCurrentPosition, CHECK_CURRENT_POSITION, (cp_offset, on_failure), \ 204 (ReBcOpType::kOffset, ReBcOpType::kLabel)) 205 206 // Bytecodes generated by peephole optimization. These don't have a direct 207 // equivalent in the RegExpMacroAssembler. 208 // Format: V(CamelName, SNAKE_NAME, // (OperandNames...), (OperandTypes...)) 209 // TODO(pthier): SNAKE_NAME is temporary to static_assert that the new bytecode 210 // enum and bytecode layouts are compatible with the old one. Remove once all 211 // uses have been migrated. 212 #define PEEPHOLE_BYTECODE_LIST(V) \ 213 /* Combination of: */ \ 214 /* LOAD_CURRENT_CHAR, CHECK_BIT_IN_TABLE and ADVANCE_CP_AND_GOTO */ \ 215 V(SkipUntilBitInTable, SKIP_UNTIL_BIT_IN_TABLE, \ 216 (cp_offset, advance_by, table, on_match, on_no_match), \ 217 (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kBitTable, \ 218 ReBcOpType::kLabel, ReBcOpType::kLabel)) \ 219 /* Combination of: */ \ 220 /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, AND_CHECK_CHAR */ \ 221 /* and ADVANCE_CP_AND_GOTO */ \ 222 V(SkipUntilCharAnd, SKIP_UNTIL_CHAR_AND, \ 223 (cp_offset, advance_by, character, mask, eats_at_least, on_match, \ 224 on_no_match), \ 225 (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kChar, \ 226 ReBcOpType::kChar, ReBcOpType::kUint32, ReBcOpType::kLabel, \ 227 ReBcOpType::kLabel)) /* TODO(pthier): eats_at_least should be Offset */ \ 228 /* Combination of: */ \ 229 /* LOAD_CURRENT_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \ 230 V(SkipUntilChar, SKIP_UNTIL_CHAR, \ 231 (cp_offset, advance_by, character, on_match, on_no_match), \ 232 (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kChar, \ 233 ReBcOpType::kLabel, ReBcOpType::kLabel)) \ 234 /* Combination of: */ \ 235 /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, CHECK_CHAR */ \ 236 /* and ADVANCE_CP_AND_GOTO */ \ 237 V(SkipUntilCharPosChecked, SKIP_UNTIL_CHAR_POS_CHECKED, \ 238 (cp_offset, advance_by, character, eats_at_least, on_match, on_no_match), \ 239 (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kChar, \ 240 ReBcOpType::kUint32, ReBcOpType::kLabel, ReBcOpType::kLabel)) \ 241 /* TODO(pthier): eats_at_least should be Offset instead of Uint32 */ \ 242 /* Combination of: */ \ 243 /* LOAD_CURRENT_CHAR, CHECK_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \ 244 V(SkipUntilCharOrChar, SKIP_UNTIL_CHAR_OR_CHAR, \ 245 (cp_offset, advance_by, padding, char1, char2, on_match, on_no_match), \ 246 (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kPadding, \ 247 ReBcOpType::kChar, ReBcOpType::kChar, ReBcOpType::kLabel, \ 248 ReBcOpType::kLabel)) \ 249 /* Combination of: */ \ 250 /* LOAD_CURRENT_CHAR, CHECK_GT, CHECK_BIT_IN_TABLE, GOTO and */ \ 251 /* and ADVANCE_CP_AND_GOTO */ \ 252 V(SkipUntilGtOrNotBitInTable, SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE, \ 253 (cp_offset, advance_by, character, table, on_match, on_no_match), \ 254 (ReBcOpType::kOffset, ReBcOpType::kOffset, ReBcOpType::kChar, \ 255 ReBcOpType::kBitTable, ReBcOpType::kLabel, ReBcOpType::kLabel)) 256 257 #define REGEXP_BYTECODE_LIST(V) \ 258 INVALID_BYTECODE_LIST(V) \ 259 BASIC_BYTECODE_LIST(V) \ 260 SPECIAL_BYTECODE_LIST(V) \ 261 PEEPHOLE_BYTECODE_LIST(V) 262 263 // The list of bytecodes, in format: V(Name, Code, ByteLength). 264 // TODO(pthier): Argument offsets of bytecodes should be easily accessible by 265 // name or at least by position. 266 // TODO(jgruber): More precise types (e.g. int32/uint32 instead of value32). 267 #define BYTECODE_ITERATOR(V) \ 268 V(BREAK, 0, 4) /* bc8 */ \ 269 V(PUSH_CP, 1, 4) /* bc8 pad24 */ \ 270 V(PUSH_BT, 2, 8) /* bc8 pad24 offset32 */ \ 271 V(SET_REGISTER_TO_CP, 3, 8) /* bc8 reg_idx24 offset32 */ \ 272 V(SET_CP_TO_REGISTER, 4, 4) /* bc8 reg_idx24 */ \ 273 V(SET_REGISTER_TO_SP, 5, 4) /* bc8 reg_idx24 */ \ 274 V(SET_SP_TO_REGISTER, 6, 4) /* bc8 reg_idx24 */ \ 275 V(SET_REGISTER, 7, 8) /* bc8 reg_idx24 value32 */ \ 276 V(ADVANCE_REGISTER, 8, 8) /* bc8 reg_idx24 value32 */ \ 277 V(POP_CP, 9, 4) /* bc8 pad24 */ \ 278 V(POP_BT, 10, 4) /* bc8 pad24 */ \ 279 V(POP_REGISTER, 11, 4) /* bc8 reg_idx24 */ \ 280 V(FAIL, 12, 4) /* bc8 pad24 */ \ 281 V(SUCCEED, 13, 4) /* bc8 pad24 */ \ 282 V(ADVANCE_CP, 14, 4) /* bc8 offset24 */ \ 283 /* Jump to another bytecode given its offset. */ \ 284 /* Bit Layout: */ \ 285 /* 0x00 - 0x07: 0x10 (fixed) Bytecode */ \ 286 /* 0x08 - 0x1F: 0x00 (unused) Padding */ \ 287 /* 0x20 - 0x3F: Address of bytecode to jump to */ \ 288 V(GOTO, 15, 8) /* bc8 pad24 addr32 */ \ 289 /* Check if offset is in range and load character at given offset. */ \ 290 /* Bit Layout: */ \ 291 /* 0x00 - 0x07: 0x11 (fixed) Bytecode */ \ 292 /* 0x08 - 0x1F: Offset from current position */ \ 293 /* 0x20 - 0x3F: Address of bytecode when load is out of range */ \ 294 V(LOAD_CURRENT_CHAR, 16, 8) /* bc8 offset24 addr32 */ \ 295 /* Check if current character is equal to a given character */ \ 296 /* Bit Layout: */ \ 297 /* 0x00 - 0x07: 0x19 (fixed) Bytecode */ \ 298 /* 0x08 - 0x0F: 0x00 (unused) Padding */ \ 299 /* 0x10 - 0x1F: Character to check */ \ 300 /* 0x20 - 0x3F: Address of bytecode when matched */ \ 301 V(CHECK_CHAR, 17, 8) /* bc8 pad8 uint16 addr32 */ \ 302 V(CHECK_NOT_CHAR, 18, 8) /* bc8 pad8 uint16 addr32 */ \ 303 /* Checks if the current character combined with mask (bitwise and) */ \ 304 /* matches a character (e.g. used when two characters in a disjunction */ \ 305 /* differ by only a single bit */ \ 306 /* Bit Layout: */ \ 307 /* 0x00 - 0x07: 0x1c (fixed) Bytecode */ \ 308 /* 0x08 - 0x0F: 0x00 (unused) Padding */ \ 309 /* 0x10 - 0x1F: Character to match against (after mask aplied) */ \ 310 /* 0x20 - 0x3F: Bitmask bitwise and combined with current character */ \ 311 /* 0x40 - 0x5F: Address of bytecode when matched */ \ 312 V(AND_CHECK_CHAR, 19, 12) /* bc8 pad8 uint16 uint32 addr32 */ \ 313 V(AND_CHECK_NOT_CHAR, 20, 12) /* bc8 pad8 uint16 uint32 addr32 */ \ 314 V(MINUS_AND_CHECK_NOT_CHAR, 21, \ 315 12) /* bc8 pad8 base::uc16 base::uc16 base::uc16 addr32 */ \ 316 V(CHECK_CHAR_IN_RANGE, 22, 12) /* bc8 pad24 base::uc16 base::uc16 addr32 */ \ 317 V(CHECK_CHAR_NOT_IN_RANGE, 23, \ 318 12) /* bc8 pad24 base::uc16 base::uc16 addr32 */ \ 319 V(CHECK_LT, 24, 8) /* bc8 pad8 base::uc16 addr32 */ \ 320 V(CHECK_GT, 25, 8) /* bc8 pad8 base::uc16 addr32 */ \ 321 V(CHECK_NOT_BACK_REF, 26, 8) /* bc8 reg_idx24 addr32 */ \ 322 V(CHECK_NOT_BACK_REF_NO_CASE, 27, 8) /* bc8 reg_idx24 addr32 */ \ 323 V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 28, 8) \ 324 V(CHECK_NOT_BACK_REF_BACKWARD, 29, 8) /* bc8 reg_idx24 addr32 */ \ 325 V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 30, 8) /* bc8 reg_idx24 addr32 */ \ 326 V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 31, 8) \ 327 V(CHECK_NOT_REGS_EQUAL, 32, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ 328 V(CHECK_REGISTER_LT, 33, 12) /* bc8 reg_idx24 value32 addr32 */ \ 329 V(CHECK_REGISTER_GE, 34, 12) /* bc8 reg_idx24 value32 addr32 */ \ 330 V(CHECK_REGISTER_EQ_POS, 35, 8) /* bc8 reg_idx24 addr32 */ \ 331 V(CHECK_AT_START, 36, 8) /* bc8 pad24 addr32 */ \ 332 V(CHECK_NOT_AT_START, 37, 8) /* bc8 offset24 addr32 */ \ 333 /* Checks if the current position matches top of backtrack stack */ \ 334 /* Bit Layout: */ \ 335 /* 0x00 - 0x07: 0x31 (fixed) Bytecode */ \ 336 /* 0x08 - 0x1F: 0x00 (unused) Padding */ \ 337 /* 0x20 - 0x3F: Address of bytecode when current matches tos */ \ 338 V(CHECK_FIXED_LENGTH, 38, 8) /* bc8 pad24 addr32 */ \ 339 /* Advance character pointer by given offset and jump to another bytecode.*/ \ 340 /* Bit Layout: */ \ 341 /* 0x00 - 0x07: 0x32 (fixed) Bytecode */ \ 342 /* 0x08 - 0x1F: Number of characters to advance */ \ 343 /* 0x20 - 0x3F: Address of bytecode to jump to */ \ 344 V(SET_CURRENT_POSITION_FROM_END, 39, 4) /* bc8 idx24 */ \ 345 V(PUSH_REGISTER, 40, 4) /* bc8 reg_idx24 */ \ 346 /* Load character at given offset without range checks. */ \ 347 /* Bit Layout: */ \ 348 /* 0x00 - 0x07: 0x12 (fixed) Bytecode */ \ 349 /* 0x08 - 0x1F: Offset from current position */ \ 350 V(LOAD_CURRENT_CHAR_UNCHECKED, 41, 4) /* bc8 offset24 */ \ 351 /* Checks if the current character matches any of the characters encoded */ \ 352 /* in a bit table. Similar to/inspired by boyer moore string search */ \ 353 /* Bit Layout: */ \ 354 /* 0x00 - 0x07: 0x22 (fixed) Bytecode */ \ 355 /* 0x08 - 0x1F: 0x00 (unused) Padding */ \ 356 /* 0x20 - 0x3F: Address of bytecode when bit is set */ \ 357 /* 0x40 - 0xBF: Bit table */ \ 358 V(CHECK_BIT_IN_TABLE, 42, 24) /* bc8 pad24 addr32 bits128 */ \ 359 V(LOAD_2_CURRENT_CHARS, 43, 8) /* bc8 offset24 addr32 */ \ 360 V(LOAD_2_CURRENT_CHARS_UNCHECKED, 44, 4) /* bc8 offset24 */ \ 361 V(LOAD_4_CURRENT_CHARS, 45, 8) /* bc8 offset24 addr32 */ \ 362 V(LOAD_4_CURRENT_CHARS_UNCHECKED, 46, 4) /* bc8 offset24 */ \ 363 V(CHECK_4_CHARS, 47, 12) /* bc8 pad24 uint32 addr32 */ \ 364 V(CHECK_NOT_4_CHARS, 48, 12) /* bc8 pad24 uint32 addr32 */ \ 365 V(AND_CHECK_4_CHARS, 49, 16) /* bc8 pad24 uint32 uint32 addr32*/ \ 366 V(AND_CHECK_NOT_4_CHARS, 50, 16) /* bc8 pad24 uint32 uint32 addr32*/ \ 367 V(ADVANCE_CP_AND_GOTO, 51, 8) /* bc8 offset24 addr32 */ \ 368 /* Checks if current position + given offset is in range. */ \ 369 /* Bit Layout: */ \ 370 /* 0x00 - 0x07: 0x34 (fixed) Bytecode */ \ 371 /* 0x08 - 0x1F: Offset from current position */ \ 372 /* 0x20 - 0x3F: Address of bytecode when position is out of range */ \ 373 V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */ \ 374 /* Combination of: */ \ 375 /* LOAD_CURRENT_CHAR, CHECK_BIT_IN_TABLE and ADVANCE_CP_AND_GOTO */ \ 376 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 377 /* Bit Layout: */ \ 378 /* 0x00 - 0x07 0x35 (fixed) Bytecode */ \ 379 /* 0x08 - 0x1F Load character offset from current position */ \ 380 /* 0x20 - 0x3F Number of characters to advance */ \ 381 /* 0x40 - 0xBF Bit Table */ \ 382 /* 0xC0 - 0xDF Address of bytecode when character is matched */ \ 383 /* 0xE0 - 0xFF Address of bytecode when no match */ \ 384 V(SKIP_UNTIL_BIT_IN_TABLE, 53, 32) \ 385 /* Combination of: */ \ 386 /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, AND_CHECK_CHAR */ \ 387 /* and ADVANCE_CP_AND_GOTO */ \ 388 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 389 /* Bit Layout: */ \ 390 /* 0x00 - 0x07 0x36 (fixed) Bytecode */ \ 391 /* 0x08 - 0x1F Load character offset from current position */ \ 392 /* 0x20 - 0x2F Number of characters to advance */ \ 393 /* 0x30 - 0x3F Character to match against (after mask applied) */ \ 394 /* 0x40 - 0x5F: Bitmask bitwise and combined with current character */ \ 395 /* 0x60 - 0x7F Minimum number of characters this pattern consumes */ \ 396 /* 0x80 - 0x9F Address of bytecode when character is matched */ \ 397 /* 0xA0 - 0xBF Address of bytecode when no match */ \ 398 V(SKIP_UNTIL_CHAR_AND, 54, 24) \ 399 /* Combination of: */ \ 400 /* LOAD_CURRENT_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \ 401 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 402 /* Bit Layout: */ \ 403 /* 0x00 - 0x07 0x37 (fixed) Bytecode */ \ 404 /* 0x08 - 0x1F Load character offset from current position */ \ 405 /* 0x20 - 0x2F Number of characters to advance */ \ 406 /* 0x30 - 0x3F Character to match */ \ 407 /* 0x40 - 0x5F Address of bytecode when character is matched */ \ 408 /* 0x60 - 0x7F Address of bytecode when no match */ \ 409 V(SKIP_UNTIL_CHAR, 55, 16) \ 410 /* Combination of: */ \ 411 /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, CHECK_CHAR */ \ 412 /* and ADVANCE_CP_AND_GOTO */ \ 413 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 414 /* Bit Layout: */ \ 415 /* 0x00 - 0x07 0x38 (fixed) Bytecode */ \ 416 /* 0x08 - 0x1F Load character offset from current position */ \ 417 /* 0x20 - 0x2F Number of characters to advance */ \ 418 /* 0x30 - 0x3F Character to match */ \ 419 /* 0x40 - 0x5F Minimum number of characters this pattern consumes */ \ 420 /* 0x60 - 0x7F Address of bytecode when character is matched */ \ 421 /* 0x80 - 0x9F Address of bytecode when no match */ \ 422 V(SKIP_UNTIL_CHAR_POS_CHECKED, 56, 20) \ 423 /* Combination of: */ \ 424 /* LOAD_CURRENT_CHAR, CHECK_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \ 425 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 426 /* Bit Layout: */ \ 427 /* 0x00 - 0x07 0x39 (fixed) Bytecode */ \ 428 /* 0x08 - 0x1F Load character offset from current position */ \ 429 /* 0x20 - 0x3F Number of characters to advance */ \ 430 /* 0x40 - 0x4F Character to match */ \ 431 /* 0x50 - 0x5F Other Character to match */ \ 432 /* 0x60 - 0x7F Address of bytecode when either character is matched */ \ 433 /* 0x80 - 0x9F Address of bytecode when no match */ \ 434 V(SKIP_UNTIL_CHAR_OR_CHAR, 57, 20) \ 435 /* Combination of: */ \ 436 /* LOAD_CURRENT_CHAR, CHECK_GT, CHECK_BIT_IN_TABLE, GOTO and */ \ 437 /* and ADVANCE_CP_AND_GOTO */ \ 438 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 439 /* Bit Layout: */ \ 440 /* 0x00 - 0x07 0x3A (fixed) Bytecode */ \ 441 /* 0x08 - 0x1F Load character offset from current position */ \ 442 /* 0x20 - 0x2F Number of characters to advance */ \ 443 /* 0x30 - 0x3F Character to check if it is less than current char */ \ 444 /* 0x40 - 0xBF Bit Table */ \ 445 /* 0xC0 - 0xDF Address of bytecode when character is matched */ \ 446 /* 0xE0 - 0xFF Address of bytecode when no match */ \ 447 V(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE, 58, 32) 448 449 #define COUNT(...) +1 450 static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT); 451 #undef COUNT 452 453 enum class RegExpBytecode : uint8_t { 454 #define DECLARE_BYTECODE(CamelName, ...) k##CamelName, 455 REGEXP_BYTECODE_LIST(DECLARE_BYTECODE) 456 #undef DECLARE_BYTECODE 457 #define COUNT_BYTECODE(x, ...) +1 458 // The COUNT_BYTECODE macro will turn this into kLast = -1 +1 +1... which will 459 // evaluate to the same value as the last real bytecode. 460 kLast = -1 REGEXP_BYTECODE_LIST(COUNT_BYTECODE) 461 }; 462 463 template <RegExpBytecode bc> 464 class RegExpBytecodeOperands; 465 466 class RegExpBytecodes final : public AllStatic { 467 public: 468 static constexpr int kCount = static_cast<uint8_t>(RegExpBytecode::kLast) + 1; 469 static constexpr uint8_t ToByte(RegExpBytecode bc) { 470 return static_cast<uint8_t>(bc); 471 } 472 static constexpr RegExpBytecode FromByte(uint8_t byte) { 473 DCHECK_LT(byte, kCount); 474 return static_cast<RegExpBytecode>(byte); 475 } 476 static constexpr const char* Name(RegExpBytecode bytecode); 477 static constexpr const char* Name(uint8_t bytecode); 478 479 static constexpr uint8_t Size(RegExpBytecode bytecode); 480 static constexpr uint8_t Size(uint8_t bytecode); 481 }; 482 483 // Just making sure we assigned values above properly. They should be 484 // contiguous, strictly increasing, and start at 0. 485 // TODO(jgruber): Do not explicitly assign values, instead generate them 486 // implicitly from the list order. 487 static_assert(kRegExpBytecodeCount == 59); 488 489 #define DECLARE_BYTECODES(name, code, length) \ 490 static constexpr int BC_##name = code; 491 BYTECODE_ITERATOR(DECLARE_BYTECODES) 492 #undef DECLARE_BYTECODES 493 494 static constexpr int kRegExpBytecodeLengths[] = { 495 #define DECLARE_BYTECODE_LENGTH(name, code, length) length, 496 BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH) 497 #undef DECLARE_BYTECODE_LENGTH 498 }; 499 500 inline constexpr int RegExpBytecodeLength(int bytecode) { 501 DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1)); 502 return kRegExpBytecodeLengths[bytecode]; 503 } 504 505 static constexpr const char* const kRegExpBytecodeNames[] = { 506 #define DECLARE_BYTECODE_NAME(name, ...) #name, 507 BYTECODE_ITERATOR(DECLARE_BYTECODE_NAME) 508 #undef DECLARE_BYTECODE_NAME 509 }; 510 511 inline constexpr const char* RegExpBytecodeName(int bytecode) { 512 DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1)); 513 return kRegExpBytecodeNames[bytecode]; 514 } 515 516 void RegExpBytecodeDisassembleSingle(const uint8_t* code_base, 517 const uint8_t* pc); 518 void RegExpBytecodeDisassemble(const uint8_t* code_base, int length, 519 const char* pattern); 520 521 } // namespace internal 522 } // namespace v8 523 524 #endif // V8_REGEXP_REGEXP_BYTECODES_H_