tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

regexp-bytecode-generator.cc (13197B)


      1 // Copyright 2008-2009 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "irregexp/imported/regexp-bytecode-generator.h"
      6 
      7 #include "irregexp/imported/regexp-bytecode-generator-inl.h"
      8 #include "irregexp/imported/regexp-bytecode-peephole.h"
      9 #include "irregexp/imported/regexp-bytecodes.h"
     10 #include "irregexp/imported/regexp-macro-assembler.h"
     11 
     12 namespace v8 {
     13 namespace internal {
     14 
     15 RegExpBytecodeGenerator::RegExpBytecodeGenerator(Isolate* isolate, Zone* zone)
     16    : RegExpMacroAssembler(isolate, zone),
     17      buffer_(kInitialBufferSize, zone),
     18      pc_(0),
     19      advance_current_end_(kInvalidPC),
     20      jump_edges_(zone),
     21      isolate_(isolate) {}
     22 
     23 RegExpBytecodeGenerator::~RegExpBytecodeGenerator() {
     24  if (backtrack_.is_linked()) backtrack_.Unuse();
     25 }
     26 
     27 RegExpBytecodeGenerator::IrregexpImplementation
     28 RegExpBytecodeGenerator::Implementation() {
     29  return kBytecodeImplementation;
     30 }
     31 
     32 void RegExpBytecodeGenerator::Bind(Label* l) {
     33  advance_current_end_ = kInvalidPC;
     34  DCHECK(!l->is_bound());
     35  if (l->is_linked()) {
     36    int pos = l->pos();
     37    while (pos != 0) {
     38      int fixup = pos;
     39      pos = *reinterpret_cast<int32_t*>(buffer_.data() + fixup);
     40      *reinterpret_cast<uint32_t*>(buffer_.data() + fixup) = pc_;
     41      jump_edges_.emplace(fixup, pc_);
     42    }
     43  }
     44  l->bind_to(pc_);
     45 }
     46 
     47 void RegExpBytecodeGenerator::EmitOrLink(Label* l) {
     48  if (l == nullptr) l = &backtrack_;
     49  int pos = 0;
     50  if (l->is_bound()) {
     51    pos = l->pos();
     52    jump_edges_.emplace(pc_, pos);
     53  } else {
     54    if (l->is_linked()) {
     55      pos = l->pos();
     56    }
     57    l->link_to(pc_);
     58  }
     59  Emit32(pos);
     60 }
     61 
     62 void RegExpBytecodeGenerator::PopRegister(int register_index) {
     63  DCHECK_LE(0, register_index);
     64  DCHECK_GE(kMaxRegister, register_index);
     65  Emit(BC_POP_REGISTER, register_index);
     66 }
     67 
     68 void RegExpBytecodeGenerator::PushRegister(int register_index,
     69                                           StackCheckFlag check_stack_limit) {
     70  DCHECK_LE(0, register_index);
     71  DCHECK_GE(kMaxRegister, register_index);
     72  Emit(BC_PUSH_REGISTER, register_index);
     73 }
     74 
     75 void RegExpBytecodeGenerator::WriteCurrentPositionToRegister(int register_index,
     76                                                             int cp_offset) {
     77  DCHECK_LE(0, register_index);
     78  DCHECK_GE(kMaxRegister, register_index);
     79  Emit(BC_SET_REGISTER_TO_CP, register_index);
     80  Emit32(cp_offset);  // Current position offset.
     81 }
     82 
     83 void RegExpBytecodeGenerator::ClearRegisters(int reg_from, int reg_to) {
     84  DCHECK(reg_from <= reg_to);
     85  for (int reg = reg_from; reg <= reg_to; reg++) {
     86    SetRegister(reg, -1);
     87  }
     88 }
     89 
     90 void RegExpBytecodeGenerator::ReadCurrentPositionFromRegister(
     91    int register_index) {
     92  DCHECK_LE(0, register_index);
     93  DCHECK_GE(kMaxRegister, register_index);
     94  Emit(BC_SET_CP_TO_REGISTER, register_index);
     95 }
     96 
     97 void RegExpBytecodeGenerator::WriteStackPointerToRegister(int register_index) {
     98  DCHECK_LE(0, register_index);
     99  DCHECK_GE(kMaxRegister, register_index);
    100  Emit(BC_SET_REGISTER_TO_SP, register_index);
    101 }
    102 
    103 void RegExpBytecodeGenerator::ReadStackPointerFromRegister(int register_index) {
    104  DCHECK_LE(0, register_index);
    105  DCHECK_GE(kMaxRegister, register_index);
    106  Emit(BC_SET_SP_TO_REGISTER, register_index);
    107 }
    108 
    109 void RegExpBytecodeGenerator::SetCurrentPositionFromEnd(int by) {
    110  DCHECK(is_uint24(by));
    111  Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
    112 }
    113 
    114 void RegExpBytecodeGenerator::SetRegister(int register_index, int to) {
    115  DCHECK_LE(0, register_index);
    116  DCHECK_GE(kMaxRegister, register_index);
    117  Emit(BC_SET_REGISTER, register_index);
    118  Emit32(to);
    119 }
    120 
    121 void RegExpBytecodeGenerator::AdvanceRegister(int register_index, int by) {
    122  DCHECK_LE(0, register_index);
    123  DCHECK_GE(kMaxRegister, register_index);
    124  Emit(BC_ADVANCE_REGISTER, register_index);
    125  Emit32(by);
    126 }
    127 
    128 void RegExpBytecodeGenerator::PopCurrentPosition() { Emit(BC_POP_CP, 0); }
    129 
    130 void RegExpBytecodeGenerator::PushCurrentPosition() { Emit(BC_PUSH_CP, 0); }
    131 
    132 void RegExpBytecodeGenerator::Backtrack() {
    133  int error_code =
    134      can_fallback() ? RegExp::RE_FALLBACK_TO_EXPERIMENTAL : RegExp::RE_FAILURE;
    135  Emit(BC_POP_BT, error_code);
    136 }
    137 
    138 void RegExpBytecodeGenerator::GoTo(Label* l) {
    139  if (advance_current_end_ == pc_) {
    140    // Combine advance current and goto.
    141    pc_ = advance_current_start_;
    142    Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_);
    143    EmitOrLink(l);
    144    advance_current_end_ = kInvalidPC;
    145  } else {
    146    // Regular goto.
    147    Emit(BC_GOTO, 0);
    148    EmitOrLink(l);
    149  }
    150 }
    151 
    152 void RegExpBytecodeGenerator::PushBacktrack(Label* l) {
    153  Emit(BC_PUSH_BT, 0);
    154  EmitOrLink(l);
    155 }
    156 
    157 bool RegExpBytecodeGenerator::Succeed() {
    158  Emit(BC_SUCCEED, 0);
    159  return false;  // Restart matching for global regexp not supported.
    160 }
    161 
    162 void RegExpBytecodeGenerator::Fail() { Emit(BC_FAIL, 0); }
    163 
    164 void RegExpBytecodeGenerator::AdvanceCurrentPosition(int by) {
    165  // TODO(chromium:1166138): Turn back into DCHECKs once the underlying issue
    166  // is fixed.
    167  CHECK_LE(kMinCPOffset, by);
    168  CHECK_GE(kMaxCPOffset, by);
    169  advance_current_start_ = pc_;
    170  advance_current_offset_ = by;
    171  Emit(BC_ADVANCE_CP, by);
    172  advance_current_end_ = pc_;
    173 }
    174 
    175 void RegExpBytecodeGenerator::CheckFixedLengthLoop(
    176    Label* on_tos_equals_current_position) {
    177  Emit(BC_CHECK_FIXED_LENGTH, 0);
    178  EmitOrLink(on_tos_equals_current_position);
    179 }
    180 
    181 void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset,
    182                                                       Label* on_failure,
    183                                                       bool check_bounds,
    184                                                       int characters,
    185                                                       int eats_at_least) {
    186  DCHECK_GE(eats_at_least, characters);
    187  if (eats_at_least > characters && check_bounds) {
    188    DCHECK(is_int24(cp_offset + eats_at_least));
    189    Emit(BC_CHECK_CURRENT_POSITION, cp_offset + eats_at_least);
    190    EmitOrLink(on_failure);
    191    check_bounds = false;  // Load below doesn't need to check.
    192  }
    193 
    194  CHECK(base::IsInRange(cp_offset, kMinCPOffset, kMaxCPOffset));
    195  int bytecode;
    196  if (check_bounds) {
    197    if (characters == 4) {
    198      bytecode = BC_LOAD_4_CURRENT_CHARS;
    199    } else if (characters == 2) {
    200      bytecode = BC_LOAD_2_CURRENT_CHARS;
    201    } else {
    202      DCHECK_EQ(1, characters);
    203      bytecode = BC_LOAD_CURRENT_CHAR;
    204    }
    205  } else {
    206    if (characters == 4) {
    207      bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
    208    } else if (characters == 2) {
    209      bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
    210    } else {
    211      DCHECK_EQ(1, characters);
    212      bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
    213    }
    214  }
    215  Emit(bytecode, cp_offset);
    216  if (check_bounds) EmitOrLink(on_failure);
    217 }
    218 
    219 void RegExpBytecodeGenerator::CheckCharacterLT(base::uc16 limit,
    220                                               Label* on_less) {
    221  Emit(BC_CHECK_LT, limit);
    222  EmitOrLink(on_less);
    223 }
    224 
    225 void RegExpBytecodeGenerator::CheckCharacterGT(base::uc16 limit,
    226                                               Label* on_greater) {
    227  Emit(BC_CHECK_GT, limit);
    228  EmitOrLink(on_greater);
    229 }
    230 
    231 void RegExpBytecodeGenerator::CheckCharacter(uint32_t c, Label* on_equal) {
    232  if (c > MAX_FIRST_ARG) {
    233    Emit(BC_CHECK_4_CHARS, 0);
    234    Emit32(c);
    235  } else {
    236    Emit(BC_CHECK_CHAR, c);
    237  }
    238  EmitOrLink(on_equal);
    239 }
    240 
    241 void RegExpBytecodeGenerator::CheckAtStart(int cp_offset, Label* on_at_start) {
    242  Emit(BC_CHECK_AT_START, cp_offset);
    243  EmitOrLink(on_at_start);
    244 }
    245 
    246 void RegExpBytecodeGenerator::CheckNotAtStart(int cp_offset,
    247                                              Label* on_not_at_start) {
    248  Emit(BC_CHECK_NOT_AT_START, cp_offset);
    249  EmitOrLink(on_not_at_start);
    250 }
    251 
    252 void RegExpBytecodeGenerator::CheckNotCharacter(uint32_t c,
    253                                                Label* on_not_equal) {
    254  if (c > MAX_FIRST_ARG) {
    255    Emit(BC_CHECK_NOT_4_CHARS, 0);
    256    Emit32(c);
    257  } else {
    258    Emit(BC_CHECK_NOT_CHAR, c);
    259  }
    260  EmitOrLink(on_not_equal);
    261 }
    262 
    263 void RegExpBytecodeGenerator::CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
    264                                                     Label* on_equal) {
    265  if (c > MAX_FIRST_ARG) {
    266    Emit(BC_AND_CHECK_4_CHARS, 0);
    267    Emit32(c);
    268  } else {
    269    Emit(BC_AND_CHECK_CHAR, c);
    270  }
    271  Emit32(mask);
    272  EmitOrLink(on_equal);
    273 }
    274 
    275 void RegExpBytecodeGenerator::CheckNotCharacterAfterAnd(uint32_t c,
    276                                                        uint32_t mask,
    277                                                        Label* on_not_equal) {
    278  if (c > MAX_FIRST_ARG) {
    279    Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
    280    Emit32(c);
    281  } else {
    282    Emit(BC_AND_CHECK_NOT_CHAR, c);
    283  }
    284  Emit32(mask);
    285  EmitOrLink(on_not_equal);
    286 }
    287 
    288 void RegExpBytecodeGenerator::CheckNotCharacterAfterMinusAnd(
    289    base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
    290  Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
    291  Emit16(minus);
    292  Emit16(mask);
    293  EmitOrLink(on_not_equal);
    294 }
    295 
    296 void RegExpBytecodeGenerator::CheckCharacterInRange(base::uc16 from,
    297                                                    base::uc16 to,
    298                                                    Label* on_in_range) {
    299  Emit(BC_CHECK_CHAR_IN_RANGE, 0);
    300  Emit16(from);
    301  Emit16(to);
    302  EmitOrLink(on_in_range);
    303 }
    304 
    305 void RegExpBytecodeGenerator::CheckCharacterNotInRange(base::uc16 from,
    306                                                       base::uc16 to,
    307                                                       Label* on_not_in_range) {
    308  Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0);
    309  Emit16(from);
    310  Emit16(to);
    311  EmitOrLink(on_not_in_range);
    312 }
    313 
    314 void RegExpBytecodeGenerator::EmitSkipTable(DirectHandle<ByteArray> table) {
    315  for (int i = 0; i < kTableSize; i += kBitsPerByte) {
    316    int byte = 0;
    317    for (int j = 0; j < kBitsPerByte; j++) {
    318      if (table->get(i + j) != 0) byte |= 1 << j;
    319    }
    320    Emit8(byte);
    321  }
    322 }
    323 
    324 void RegExpBytecodeGenerator::CheckBitInTable(Handle<ByteArray> table,
    325                                              Label* on_bit_set) {
    326  Emit(BC_CHECK_BIT_IN_TABLE, 0);
    327  EmitOrLink(on_bit_set);
    328  EmitSkipTable(table);
    329 }
    330 
    331 void RegExpBytecodeGenerator::SkipUntilBitInTable(
    332    int cp_offset, Handle<ByteArray> table, Handle<ByteArray> nibble_table,
    333    int advance_by) {
    334  Label cont;
    335  Emit(BC_SKIP_UNTIL_BIT_IN_TABLE, cp_offset);
    336  Emit32(advance_by);
    337  EmitSkipTable(table);
    338  EmitOrLink(&cont);  // goto_when_match
    339  EmitOrLink(&cont);  // goto_on_failure
    340  Bind(&cont);
    341 }
    342 
    343 void RegExpBytecodeGenerator::CheckNotBackReference(int start_reg,
    344                                                    bool read_backward,
    345                                                    Label* on_not_equal) {
    346  DCHECK_LE(0, start_reg);
    347  DCHECK_GE(kMaxRegister, start_reg);
    348  Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
    349       start_reg);
    350  EmitOrLink(on_not_equal);
    351 }
    352 
    353 void RegExpBytecodeGenerator::CheckNotBackReferenceIgnoreCase(
    354    int start_reg, bool read_backward, bool unicode, Label* on_not_equal) {
    355  DCHECK_LE(0, start_reg);
    356  DCHECK_GE(kMaxRegister, start_reg);
    357  Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
    358                                : BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
    359                     : (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
    360                                : BC_CHECK_NOT_BACK_REF_NO_CASE),
    361       start_reg);
    362  EmitOrLink(on_not_equal);
    363 }
    364 
    365 void RegExpBytecodeGenerator::IfRegisterLT(int register_index, int comparand,
    366                                           Label* on_less_than) {
    367  DCHECK_LE(0, register_index);
    368  DCHECK_GE(kMaxRegister, register_index);
    369  Emit(BC_CHECK_REGISTER_LT, register_index);
    370  Emit32(comparand);
    371  EmitOrLink(on_less_than);
    372 }
    373 
    374 void RegExpBytecodeGenerator::IfRegisterGE(int register_index, int comparand,
    375                                           Label* on_greater_or_equal) {
    376  DCHECK_LE(0, register_index);
    377  DCHECK_GE(kMaxRegister, register_index);
    378  Emit(BC_CHECK_REGISTER_GE, register_index);
    379  Emit32(comparand);
    380  EmitOrLink(on_greater_or_equal);
    381 }
    382 
    383 void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index,
    384                                              Label* on_eq) {
    385  DCHECK_LE(0, register_index);
    386  DCHECK_GE(kMaxRegister, register_index);
    387  Emit(BC_CHECK_REGISTER_EQ_POS, register_index);
    388  EmitOrLink(on_eq);
    389 }
    390 
    391 DirectHandle<HeapObject> RegExpBytecodeGenerator::GetCode(
    392    DirectHandle<String> source, RegExpFlags flags) {
    393  Bind(&backtrack_);
    394  Backtrack();
    395 
    396  DirectHandle<TrustedByteArray> array;
    397  if (v8_flags.regexp_peephole_optimization) {
    398    array = RegExpBytecodePeepholeOptimization::OptimizeBytecode(
    399        isolate_, zone(), source, buffer_.data(), length(), jump_edges_);
    400  } else {
    401    array = isolate_->factory()->NewTrustedByteArray(length());
    402    Copy(array->begin());
    403  }
    404 
    405  return array;
    406 }
    407 
    408 int RegExpBytecodeGenerator::length() { return pc_; }
    409 
    410 void RegExpBytecodeGenerator::Copy(uint8_t* a) {
    411  MemCopy(a, buffer_.data(), length());
    412 }
    413 
    414 void RegExpBytecodeGenerator::ExpandBuffer() {
    415  // TODO(jgruber): The growth strategy could be smarter for large sizes.
    416  // TODO(jgruber): It's not necessary to default-initialize new elements.
    417  buffer_.resize(buffer_.size() * 2);
    418 }
    419 
    420 }  // namespace internal
    421 }  // namespace v8