regexp-bytecode-generator.cc (13197B)
1 // Copyright 2008-2009 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "irregexp/imported/regexp-bytecode-generator.h" 6 7 #include "irregexp/imported/regexp-bytecode-generator-inl.h" 8 #include "irregexp/imported/regexp-bytecode-peephole.h" 9 #include "irregexp/imported/regexp-bytecodes.h" 10 #include "irregexp/imported/regexp-macro-assembler.h" 11 12 namespace v8 { 13 namespace internal { 14 15 RegExpBytecodeGenerator::RegExpBytecodeGenerator(Isolate* isolate, Zone* zone) 16 : RegExpMacroAssembler(isolate, zone), 17 buffer_(kInitialBufferSize, zone), 18 pc_(0), 19 advance_current_end_(kInvalidPC), 20 jump_edges_(zone), 21 isolate_(isolate) {} 22 23 RegExpBytecodeGenerator::~RegExpBytecodeGenerator() { 24 if (backtrack_.is_linked()) backtrack_.Unuse(); 25 } 26 27 RegExpBytecodeGenerator::IrregexpImplementation 28 RegExpBytecodeGenerator::Implementation() { 29 return kBytecodeImplementation; 30 } 31 32 void RegExpBytecodeGenerator::Bind(Label* l) { 33 advance_current_end_ = kInvalidPC; 34 DCHECK(!l->is_bound()); 35 if (l->is_linked()) { 36 int pos = l->pos(); 37 while (pos != 0) { 38 int fixup = pos; 39 pos = *reinterpret_cast<int32_t*>(buffer_.data() + fixup); 40 *reinterpret_cast<uint32_t*>(buffer_.data() + fixup) = pc_; 41 jump_edges_.emplace(fixup, pc_); 42 } 43 } 44 l->bind_to(pc_); 45 } 46 47 void RegExpBytecodeGenerator::EmitOrLink(Label* l) { 48 if (l == nullptr) l = &backtrack_; 49 int pos = 0; 50 if (l->is_bound()) { 51 pos = l->pos(); 52 jump_edges_.emplace(pc_, pos); 53 } else { 54 if (l->is_linked()) { 55 pos = l->pos(); 56 } 57 l->link_to(pc_); 58 } 59 Emit32(pos); 60 } 61 62 void RegExpBytecodeGenerator::PopRegister(int register_index) { 63 DCHECK_LE(0, register_index); 64 DCHECK_GE(kMaxRegister, register_index); 65 Emit(BC_POP_REGISTER, register_index); 66 } 67 68 void RegExpBytecodeGenerator::PushRegister(int register_index, 69 StackCheckFlag check_stack_limit) { 70 DCHECK_LE(0, register_index); 71 DCHECK_GE(kMaxRegister, register_index); 72 Emit(BC_PUSH_REGISTER, register_index); 73 } 74 75 void RegExpBytecodeGenerator::WriteCurrentPositionToRegister(int register_index, 76 int cp_offset) { 77 DCHECK_LE(0, register_index); 78 DCHECK_GE(kMaxRegister, register_index); 79 Emit(BC_SET_REGISTER_TO_CP, register_index); 80 Emit32(cp_offset); // Current position offset. 81 } 82 83 void RegExpBytecodeGenerator::ClearRegisters(int reg_from, int reg_to) { 84 DCHECK(reg_from <= reg_to); 85 for (int reg = reg_from; reg <= reg_to; reg++) { 86 SetRegister(reg, -1); 87 } 88 } 89 90 void RegExpBytecodeGenerator::ReadCurrentPositionFromRegister( 91 int register_index) { 92 DCHECK_LE(0, register_index); 93 DCHECK_GE(kMaxRegister, register_index); 94 Emit(BC_SET_CP_TO_REGISTER, register_index); 95 } 96 97 void RegExpBytecodeGenerator::WriteStackPointerToRegister(int register_index) { 98 DCHECK_LE(0, register_index); 99 DCHECK_GE(kMaxRegister, register_index); 100 Emit(BC_SET_REGISTER_TO_SP, register_index); 101 } 102 103 void RegExpBytecodeGenerator::ReadStackPointerFromRegister(int register_index) { 104 DCHECK_LE(0, register_index); 105 DCHECK_GE(kMaxRegister, register_index); 106 Emit(BC_SET_SP_TO_REGISTER, register_index); 107 } 108 109 void RegExpBytecodeGenerator::SetCurrentPositionFromEnd(int by) { 110 DCHECK(is_uint24(by)); 111 Emit(BC_SET_CURRENT_POSITION_FROM_END, by); 112 } 113 114 void RegExpBytecodeGenerator::SetRegister(int register_index, int to) { 115 DCHECK_LE(0, register_index); 116 DCHECK_GE(kMaxRegister, register_index); 117 Emit(BC_SET_REGISTER, register_index); 118 Emit32(to); 119 } 120 121 void RegExpBytecodeGenerator::AdvanceRegister(int register_index, int by) { 122 DCHECK_LE(0, register_index); 123 DCHECK_GE(kMaxRegister, register_index); 124 Emit(BC_ADVANCE_REGISTER, register_index); 125 Emit32(by); 126 } 127 128 void RegExpBytecodeGenerator::PopCurrentPosition() { Emit(BC_POP_CP, 0); } 129 130 void RegExpBytecodeGenerator::PushCurrentPosition() { Emit(BC_PUSH_CP, 0); } 131 132 void RegExpBytecodeGenerator::Backtrack() { 133 int error_code = 134 can_fallback() ? RegExp::RE_FALLBACK_TO_EXPERIMENTAL : RegExp::RE_FAILURE; 135 Emit(BC_POP_BT, error_code); 136 } 137 138 void RegExpBytecodeGenerator::GoTo(Label* l) { 139 if (advance_current_end_ == pc_) { 140 // Combine advance current and goto. 141 pc_ = advance_current_start_; 142 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_); 143 EmitOrLink(l); 144 advance_current_end_ = kInvalidPC; 145 } else { 146 // Regular goto. 147 Emit(BC_GOTO, 0); 148 EmitOrLink(l); 149 } 150 } 151 152 void RegExpBytecodeGenerator::PushBacktrack(Label* l) { 153 Emit(BC_PUSH_BT, 0); 154 EmitOrLink(l); 155 } 156 157 bool RegExpBytecodeGenerator::Succeed() { 158 Emit(BC_SUCCEED, 0); 159 return false; // Restart matching for global regexp not supported. 160 } 161 162 void RegExpBytecodeGenerator::Fail() { Emit(BC_FAIL, 0); } 163 164 void RegExpBytecodeGenerator::AdvanceCurrentPosition(int by) { 165 // TODO(chromium:1166138): Turn back into DCHECKs once the underlying issue 166 // is fixed. 167 CHECK_LE(kMinCPOffset, by); 168 CHECK_GE(kMaxCPOffset, by); 169 advance_current_start_ = pc_; 170 advance_current_offset_ = by; 171 Emit(BC_ADVANCE_CP, by); 172 advance_current_end_ = pc_; 173 } 174 175 void RegExpBytecodeGenerator::CheckFixedLengthLoop( 176 Label* on_tos_equals_current_position) { 177 Emit(BC_CHECK_FIXED_LENGTH, 0); 178 EmitOrLink(on_tos_equals_current_position); 179 } 180 181 void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset, 182 Label* on_failure, 183 bool check_bounds, 184 int characters, 185 int eats_at_least) { 186 DCHECK_GE(eats_at_least, characters); 187 if (eats_at_least > characters && check_bounds) { 188 DCHECK(is_int24(cp_offset + eats_at_least)); 189 Emit(BC_CHECK_CURRENT_POSITION, cp_offset + eats_at_least); 190 EmitOrLink(on_failure); 191 check_bounds = false; // Load below doesn't need to check. 192 } 193 194 CHECK(base::IsInRange(cp_offset, kMinCPOffset, kMaxCPOffset)); 195 int bytecode; 196 if (check_bounds) { 197 if (characters == 4) { 198 bytecode = BC_LOAD_4_CURRENT_CHARS; 199 } else if (characters == 2) { 200 bytecode = BC_LOAD_2_CURRENT_CHARS; 201 } else { 202 DCHECK_EQ(1, characters); 203 bytecode = BC_LOAD_CURRENT_CHAR; 204 } 205 } else { 206 if (characters == 4) { 207 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED; 208 } else if (characters == 2) { 209 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED; 210 } else { 211 DCHECK_EQ(1, characters); 212 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED; 213 } 214 } 215 Emit(bytecode, cp_offset); 216 if (check_bounds) EmitOrLink(on_failure); 217 } 218 219 void RegExpBytecodeGenerator::CheckCharacterLT(base::uc16 limit, 220 Label* on_less) { 221 Emit(BC_CHECK_LT, limit); 222 EmitOrLink(on_less); 223 } 224 225 void RegExpBytecodeGenerator::CheckCharacterGT(base::uc16 limit, 226 Label* on_greater) { 227 Emit(BC_CHECK_GT, limit); 228 EmitOrLink(on_greater); 229 } 230 231 void RegExpBytecodeGenerator::CheckCharacter(uint32_t c, Label* on_equal) { 232 if (c > MAX_FIRST_ARG) { 233 Emit(BC_CHECK_4_CHARS, 0); 234 Emit32(c); 235 } else { 236 Emit(BC_CHECK_CHAR, c); 237 } 238 EmitOrLink(on_equal); 239 } 240 241 void RegExpBytecodeGenerator::CheckAtStart(int cp_offset, Label* on_at_start) { 242 Emit(BC_CHECK_AT_START, cp_offset); 243 EmitOrLink(on_at_start); 244 } 245 246 void RegExpBytecodeGenerator::CheckNotAtStart(int cp_offset, 247 Label* on_not_at_start) { 248 Emit(BC_CHECK_NOT_AT_START, cp_offset); 249 EmitOrLink(on_not_at_start); 250 } 251 252 void RegExpBytecodeGenerator::CheckNotCharacter(uint32_t c, 253 Label* on_not_equal) { 254 if (c > MAX_FIRST_ARG) { 255 Emit(BC_CHECK_NOT_4_CHARS, 0); 256 Emit32(c); 257 } else { 258 Emit(BC_CHECK_NOT_CHAR, c); 259 } 260 EmitOrLink(on_not_equal); 261 } 262 263 void RegExpBytecodeGenerator::CheckCharacterAfterAnd(uint32_t c, uint32_t mask, 264 Label* on_equal) { 265 if (c > MAX_FIRST_ARG) { 266 Emit(BC_AND_CHECK_4_CHARS, 0); 267 Emit32(c); 268 } else { 269 Emit(BC_AND_CHECK_CHAR, c); 270 } 271 Emit32(mask); 272 EmitOrLink(on_equal); 273 } 274 275 void RegExpBytecodeGenerator::CheckNotCharacterAfterAnd(uint32_t c, 276 uint32_t mask, 277 Label* on_not_equal) { 278 if (c > MAX_FIRST_ARG) { 279 Emit(BC_AND_CHECK_NOT_4_CHARS, 0); 280 Emit32(c); 281 } else { 282 Emit(BC_AND_CHECK_NOT_CHAR, c); 283 } 284 Emit32(mask); 285 EmitOrLink(on_not_equal); 286 } 287 288 void RegExpBytecodeGenerator::CheckNotCharacterAfterMinusAnd( 289 base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) { 290 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c); 291 Emit16(minus); 292 Emit16(mask); 293 EmitOrLink(on_not_equal); 294 } 295 296 void RegExpBytecodeGenerator::CheckCharacterInRange(base::uc16 from, 297 base::uc16 to, 298 Label* on_in_range) { 299 Emit(BC_CHECK_CHAR_IN_RANGE, 0); 300 Emit16(from); 301 Emit16(to); 302 EmitOrLink(on_in_range); 303 } 304 305 void RegExpBytecodeGenerator::CheckCharacterNotInRange(base::uc16 from, 306 base::uc16 to, 307 Label* on_not_in_range) { 308 Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0); 309 Emit16(from); 310 Emit16(to); 311 EmitOrLink(on_not_in_range); 312 } 313 314 void RegExpBytecodeGenerator::EmitSkipTable(DirectHandle<ByteArray> table) { 315 for (int i = 0; i < kTableSize; i += kBitsPerByte) { 316 int byte = 0; 317 for (int j = 0; j < kBitsPerByte; j++) { 318 if (table->get(i + j) != 0) byte |= 1 << j; 319 } 320 Emit8(byte); 321 } 322 } 323 324 void RegExpBytecodeGenerator::CheckBitInTable(Handle<ByteArray> table, 325 Label* on_bit_set) { 326 Emit(BC_CHECK_BIT_IN_TABLE, 0); 327 EmitOrLink(on_bit_set); 328 EmitSkipTable(table); 329 } 330 331 void RegExpBytecodeGenerator::SkipUntilBitInTable( 332 int cp_offset, Handle<ByteArray> table, Handle<ByteArray> nibble_table, 333 int advance_by) { 334 Label cont; 335 Emit(BC_SKIP_UNTIL_BIT_IN_TABLE, cp_offset); 336 Emit32(advance_by); 337 EmitSkipTable(table); 338 EmitOrLink(&cont); // goto_when_match 339 EmitOrLink(&cont); // goto_on_failure 340 Bind(&cont); 341 } 342 343 void RegExpBytecodeGenerator::CheckNotBackReference(int start_reg, 344 bool read_backward, 345 Label* on_not_equal) { 346 DCHECK_LE(0, start_reg); 347 DCHECK_GE(kMaxRegister, start_reg); 348 Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF, 349 start_reg); 350 EmitOrLink(on_not_equal); 351 } 352 353 void RegExpBytecodeGenerator::CheckNotBackReferenceIgnoreCase( 354 int start_reg, bool read_backward, bool unicode, Label* on_not_equal) { 355 DCHECK_LE(0, start_reg); 356 DCHECK_GE(kMaxRegister, start_reg); 357 Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD 358 : BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) 359 : (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE 360 : BC_CHECK_NOT_BACK_REF_NO_CASE), 361 start_reg); 362 EmitOrLink(on_not_equal); 363 } 364 365 void RegExpBytecodeGenerator::IfRegisterLT(int register_index, int comparand, 366 Label* on_less_than) { 367 DCHECK_LE(0, register_index); 368 DCHECK_GE(kMaxRegister, register_index); 369 Emit(BC_CHECK_REGISTER_LT, register_index); 370 Emit32(comparand); 371 EmitOrLink(on_less_than); 372 } 373 374 void RegExpBytecodeGenerator::IfRegisterGE(int register_index, int comparand, 375 Label* on_greater_or_equal) { 376 DCHECK_LE(0, register_index); 377 DCHECK_GE(kMaxRegister, register_index); 378 Emit(BC_CHECK_REGISTER_GE, register_index); 379 Emit32(comparand); 380 EmitOrLink(on_greater_or_equal); 381 } 382 383 void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index, 384 Label* on_eq) { 385 DCHECK_LE(0, register_index); 386 DCHECK_GE(kMaxRegister, register_index); 387 Emit(BC_CHECK_REGISTER_EQ_POS, register_index); 388 EmitOrLink(on_eq); 389 } 390 391 DirectHandle<HeapObject> RegExpBytecodeGenerator::GetCode( 392 DirectHandle<String> source, RegExpFlags flags) { 393 Bind(&backtrack_); 394 Backtrack(); 395 396 DirectHandle<TrustedByteArray> array; 397 if (v8_flags.regexp_peephole_optimization) { 398 array = RegExpBytecodePeepholeOptimization::OptimizeBytecode( 399 isolate_, zone(), source, buffer_.data(), length(), jump_edges_); 400 } else { 401 array = isolate_->factory()->NewTrustedByteArray(length()); 402 Copy(array->begin()); 403 } 404 405 return array; 406 } 407 408 int RegExpBytecodeGenerator::length() { return pc_; } 409 410 void RegExpBytecodeGenerator::Copy(uint8_t* a) { 411 MemCopy(a, buffer_.data(), length()); 412 } 413 414 void RegExpBytecodeGenerator::ExpandBuffer() { 415 // TODO(jgruber): The growth strategy could be smarter for large sizes. 416 // TODO(jgruber): It's not necessary to default-initialize new elements. 417 buffer_.resize(buffer_.size() * 2); 418 } 419 420 } // namespace internal 421 } // namespace v8