repattrn.cpp (25500B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 // 4 // file: repattrn.cpp 5 // 6 /* 7 *************************************************************************** 8 * Copyright (C) 2002-2016 International Business Machines Corporation 9 * and others. All rights reserved. 10 *************************************************************************** 11 */ 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 16 17 #include "unicode/regex.h" 18 #include "unicode/uclean.h" 19 #include "cmemory.h" 20 #include "cstr.h" 21 #include "uassert.h" 22 #include "uhash.h" 23 #include "uvector.h" 24 #include "uvectr32.h" 25 #include "uvectr64.h" 26 #include "regexcmp.h" 27 #include "regeximp.h" 28 #include "regexst.h" 29 30 U_NAMESPACE_BEGIN 31 32 //-------------------------------------------------------------------------- 33 // 34 // RegexPattern Default Constructor 35 // 36 //-------------------------------------------------------------------------- 37 RegexPattern::RegexPattern() { 38 // Init all of this instances data. 39 init(); 40 } 41 42 43 //-------------------------------------------------------------------------- 44 // 45 // Copy Constructor Note: This is a rather inefficient implementation, 46 // but it probably doesn't matter. 47 // 48 //-------------------------------------------------------------------------- 49 RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) { 50 init(); 51 *this = other; 52 } 53 54 55 56 //-------------------------------------------------------------------------- 57 // 58 // Assignment Operator 59 // 60 //-------------------------------------------------------------------------- 61 RegexPattern &RegexPattern::operator = (const RegexPattern &other) { 62 if (this == &other) { 63 // Source and destination are the same. Don't do anything. 64 return *this; 65 } 66 67 // Clean out any previous contents of object being assigned to. 68 zap(); 69 70 // Give target object a default initialization 71 init(); 72 73 // Copy simple fields 74 fDeferredStatus = other.fDeferredStatus; 75 76 if (U_FAILURE(fDeferredStatus)) { 77 return *this; 78 } 79 80 if (other.fPatternString == nullptr) { 81 fPatternString = nullptr; 82 fPattern = utext_clone(fPattern, other.fPattern, false, true, &fDeferredStatus); 83 } else { 84 fPatternString = new UnicodeString(*(other.fPatternString)); 85 if (fPatternString == nullptr) { 86 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 87 } else { 88 fPattern = utext_openConstUnicodeString(nullptr, fPatternString, &fDeferredStatus); 89 } 90 } 91 if (U_FAILURE(fDeferredStatus)) { 92 return *this; 93 } 94 95 fFlags = other.fFlags; 96 fLiteralText = other.fLiteralText; 97 fMinMatchLen = other.fMinMatchLen; 98 fFrameSize = other.fFrameSize; 99 fDataSize = other.fDataSize; 100 101 fStartType = other.fStartType; 102 fInitialStringIdx = other.fInitialStringIdx; 103 fInitialStringLen = other.fInitialStringLen; 104 *fInitialChars = *other.fInitialChars; 105 fInitialChar = other.fInitialChar; 106 *fInitialChars8 = *other.fInitialChars8; 107 fNeedsAltInput = other.fNeedsAltInput; 108 109 // Copy the pattern. It's just values, nothing deep to copy. 110 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); 111 fGroupMap->assign(*other.fGroupMap, fDeferredStatus); 112 113 // Copy the Unicode Sets. 114 // Could be made more efficient if the sets were reference counted and shared, 115 // but I doubt that pattern copying will be particularly common. 116 // Note: init() already added an empty element zero to fSets 117 int32_t i; 118 int32_t numSets = other.fSets->size(); 119 fSets8 = new Regex8BitSet[numSets]; 120 if (fSets8 == nullptr) { 121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 122 return *this; 123 } 124 for (i=1; i<numSets; i++) { 125 if (U_FAILURE(fDeferredStatus)) { 126 return *this; 127 } 128 UnicodeSet* sourceSet = static_cast<UnicodeSet*>(other.fSets->elementAt(i)); 129 UnicodeSet *newSet = new UnicodeSet(*sourceSet); 130 if (newSet == nullptr) { 131 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 132 break; 133 } 134 fSets->addElement(newSet, fDeferredStatus); 135 fSets8[i] = other.fSets8[i]; 136 } 137 138 // Copy the named capture group hash map. 139 if (other.fNamedCaptureMap != nullptr && initNamedCaptureMap()) { 140 int32_t hashPos = UHASH_FIRST; 141 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) { 142 if (U_FAILURE(fDeferredStatus)) { 143 break; 144 } 145 const UnicodeString* name = static_cast<const UnicodeString*>(hashEl->key.pointer); 146 UnicodeString *key = new UnicodeString(*name); 147 int32_t val = hashEl->value.integer; 148 if (key == nullptr) { 149 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 150 } else { 151 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus); 152 } 153 } 154 } 155 return *this; 156 } 157 158 159 //-------------------------------------------------------------------------- 160 // 161 // init Shared initialization for use by constructors. 162 // Bring an uninitialized RegexPattern up to a default state. 163 // 164 //-------------------------------------------------------------------------- 165 void RegexPattern::init() { 166 fFlags = 0; 167 fCompiledPat = nullptr; 168 fLiteralText.remove(); 169 fSets = nullptr; 170 fSets8 = nullptr; 171 fDeferredStatus = U_ZERO_ERROR; 172 fMinMatchLen = 0; 173 fFrameSize = 0; 174 fDataSize = 0; 175 fGroupMap = nullptr; 176 fStartType = START_NO_INFO; 177 fInitialStringIdx = 0; 178 fInitialStringLen = 0; 179 fInitialChars = nullptr; 180 fInitialChar = 0; 181 fInitialChars8 = nullptr; 182 fNeedsAltInput = false; 183 fNamedCaptureMap = nullptr; 184 185 fPattern = nullptr; // will be set later 186 fPatternString = nullptr; // may be set later 187 fCompiledPat = new UVector64(fDeferredStatus); 188 fGroupMap = new UVector32(fDeferredStatus); 189 fSets = new UVector(fDeferredStatus); 190 fInitialChars = new UnicodeSet; 191 fInitialChars8 = new Regex8BitSet; 192 if (U_FAILURE(fDeferredStatus)) { 193 return; 194 } 195 if (fCompiledPat == nullptr || fGroupMap == nullptr || fSets == nullptr || 196 fInitialChars == nullptr || fInitialChars8 == nullptr) { 197 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 198 return; 199 } 200 201 // Slot zero of the vector of sets is reserved. Fill it here. 202 fSets->addElement(static_cast<int32_t>(0), fDeferredStatus); 203 } 204 205 206 bool RegexPattern::initNamedCaptureMap() { 207 if (fNamedCaptureMap) { 208 return true; 209 } 210 fNamedCaptureMap = uhash_openSize(uhash_hashUnicodeString, // Key hash function 211 uhash_compareUnicodeString, // Key comparator function 212 uhash_compareLong, // Value comparator function 213 7, // Initial table capacity 214 &fDeferredStatus); 215 if (U_FAILURE(fDeferredStatus)) { 216 return false; 217 } 218 219 // fNamedCaptureMap owns its key strings, type (UnicodeString *) 220 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject); 221 return true; 222 } 223 224 //-------------------------------------------------------------------------- 225 // 226 // zap Delete everything owned by this RegexPattern. 227 // 228 //-------------------------------------------------------------------------- 229 void RegexPattern::zap() { 230 delete fCompiledPat; 231 fCompiledPat = nullptr; 232 int i; 233 for (i=1; i<fSets->size(); i++) { 234 UnicodeSet *s; 235 s = static_cast<UnicodeSet*>(fSets->elementAt(i)); 236 delete s; 237 } 238 delete fSets; 239 fSets = nullptr; 240 delete[] fSets8; 241 fSets8 = nullptr; 242 delete fGroupMap; 243 fGroupMap = nullptr; 244 delete fInitialChars; 245 fInitialChars = nullptr; 246 delete fInitialChars8; 247 fInitialChars8 = nullptr; 248 if (fPattern != nullptr) { 249 utext_close(fPattern); 250 fPattern = nullptr; 251 } 252 if (fPatternString != nullptr) { 253 delete fPatternString; 254 fPatternString = nullptr; 255 } 256 if (fNamedCaptureMap != nullptr) { 257 uhash_close(fNamedCaptureMap); 258 fNamedCaptureMap = nullptr; 259 } 260 } 261 262 263 //-------------------------------------------------------------------------- 264 // 265 // Destructor 266 // 267 //-------------------------------------------------------------------------- 268 RegexPattern::~RegexPattern() { 269 zap(); 270 } 271 272 273 //-------------------------------------------------------------------------- 274 // 275 // Clone 276 // 277 //-------------------------------------------------------------------------- 278 RegexPattern *RegexPattern::clone() const { 279 RegexPattern *copy = new RegexPattern(*this); 280 return copy; 281 } 282 283 284 //-------------------------------------------------------------------------- 285 // 286 // operator == (comparison) Consider to patterns to be == if the 287 // pattern strings and the flags are the same. 288 // Note that pattern strings with the same 289 // characters can still be considered different. 290 // 291 //-------------------------------------------------------------------------- 292 bool RegexPattern::operator ==(const RegexPattern &other) const { 293 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) { 294 if (this->fPatternString != nullptr && other.fPatternString != nullptr) { 295 return *(this->fPatternString) == *(other.fPatternString); 296 } else if (this->fPattern == nullptr) { 297 if (other.fPattern == nullptr) { 298 return true; 299 } 300 } else if (other.fPattern != nullptr) { 301 UTEXT_SETNATIVEINDEX(this->fPattern, 0); 302 UTEXT_SETNATIVEINDEX(other.fPattern, 0); 303 return utext_equals(this->fPattern, other.fPattern); 304 } 305 } 306 return false; 307 } 308 309 //--------------------------------------------------------------------- 310 // 311 // compile 312 // 313 //--------------------------------------------------------------------- 314 RegexPattern * U_EXPORT2 315 RegexPattern::compile(const UnicodeString ®ex, 316 uint32_t flags, 317 UParseError &pe, 318 UErrorCode &status) 319 { 320 if (U_FAILURE(status)) { 321 return nullptr; 322 } 323 324 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 325 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 326 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 327 328 if ((flags & ~allFlags) != 0) { 329 status = U_REGEX_INVALID_FLAG; 330 return nullptr; 331 } 332 333 if ((flags & UREGEX_CANON_EQ) != 0) { 334 status = U_REGEX_UNIMPLEMENTED; 335 return nullptr; 336 } 337 338 RegexPattern *This = new RegexPattern; 339 if (This == nullptr) { 340 status = U_MEMORY_ALLOCATION_ERROR; 341 return nullptr; 342 } 343 if (U_FAILURE(This->fDeferredStatus)) { 344 status = This->fDeferredStatus; 345 delete This; 346 return nullptr; 347 } 348 This->fFlags = flags; 349 350 RegexCompile compiler(This, status); 351 compiler.compile(regex, pe, status); 352 353 if (U_FAILURE(status)) { 354 delete This; 355 This = nullptr; 356 } 357 358 return This; 359 } 360 361 362 // 363 // compile, UText mode 364 // 365 RegexPattern * U_EXPORT2 366 RegexPattern::compile(UText *regex, 367 uint32_t flags, 368 UParseError &pe, 369 UErrorCode &status) 370 { 371 if (U_FAILURE(status)) { 372 return nullptr; 373 } 374 375 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 376 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 377 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 378 379 if ((flags & ~allFlags) != 0) { 380 status = U_REGEX_INVALID_FLAG; 381 return nullptr; 382 } 383 384 if ((flags & UREGEX_CANON_EQ) != 0) { 385 status = U_REGEX_UNIMPLEMENTED; 386 return nullptr; 387 } 388 389 RegexPattern *This = new RegexPattern; 390 if (This == nullptr) { 391 status = U_MEMORY_ALLOCATION_ERROR; 392 return nullptr; 393 } 394 if (U_FAILURE(This->fDeferredStatus)) { 395 status = This->fDeferredStatus; 396 delete This; 397 return nullptr; 398 } 399 This->fFlags = flags; 400 401 RegexCompile compiler(This, status); 402 compiler.compile(regex, pe, status); 403 404 if (U_FAILURE(status)) { 405 delete This; 406 This = nullptr; 407 } 408 409 return This; 410 } 411 412 // 413 // compile with default flags. 414 // 415 RegexPattern * U_EXPORT2 416 RegexPattern::compile(const UnicodeString ®ex, 417 UParseError &pe, 418 UErrorCode &err) 419 { 420 return compile(regex, 0, pe, err); 421 } 422 423 424 // 425 // compile with default flags, UText mode 426 // 427 RegexPattern * U_EXPORT2 428 RegexPattern::compile(UText *regex, 429 UParseError &pe, 430 UErrorCode &err) 431 { 432 return compile(regex, 0, pe, err); 433 } 434 435 436 // 437 // compile with no UParseErr parameter. 438 // 439 RegexPattern * U_EXPORT2 440 RegexPattern::compile(const UnicodeString ®ex, 441 uint32_t flags, 442 UErrorCode &err) 443 { 444 UParseError pe; 445 return compile(regex, flags, pe, err); 446 } 447 448 449 // 450 // compile with no UParseErr parameter, UText mode 451 // 452 RegexPattern * U_EXPORT2 453 RegexPattern::compile(UText *regex, 454 uint32_t flags, 455 UErrorCode &err) 456 { 457 UParseError pe; 458 return compile(regex, flags, pe, err); 459 } 460 461 462 //--------------------------------------------------------------------- 463 // 464 // flags 465 // 466 //--------------------------------------------------------------------- 467 uint32_t RegexPattern::flags() const { 468 return fFlags; 469 } 470 471 472 //--------------------------------------------------------------------- 473 // 474 // matcher(UnicodeString, err) 475 // 476 //--------------------------------------------------------------------- 477 RegexMatcher *RegexPattern::matcher(const UnicodeString &input, 478 UErrorCode &status) const { 479 RegexMatcher *retMatcher = matcher(status); 480 if (retMatcher != nullptr) { 481 retMatcher->fDeferredStatus = status; 482 retMatcher->reset(input); 483 } 484 return retMatcher; 485 } 486 487 488 //--------------------------------------------------------------------- 489 // 490 // matcher(status) 491 // 492 //--------------------------------------------------------------------- 493 RegexMatcher *RegexPattern::matcher(UErrorCode &status) const { 494 RegexMatcher *retMatcher = nullptr; 495 496 if (U_FAILURE(status)) { 497 return nullptr; 498 } 499 if (U_FAILURE(fDeferredStatus)) { 500 status = fDeferredStatus; 501 return nullptr; 502 } 503 504 retMatcher = new RegexMatcher(this); 505 if (retMatcher == nullptr) { 506 status = U_MEMORY_ALLOCATION_ERROR; 507 return nullptr; 508 } 509 return retMatcher; 510 } 511 512 513 514 //--------------------------------------------------------------------- 515 // 516 // matches Convenience function to test for a match, starting 517 // with a pattern string and a data string. 518 // 519 //--------------------------------------------------------------------- 520 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, 521 const UnicodeString &input, 522 UParseError &pe, 523 UErrorCode &status) { 524 525 if (U_FAILURE(status)) {return false;} 526 527 UBool retVal; 528 RegexPattern *pat = nullptr; 529 RegexMatcher *matcher = nullptr; 530 531 pat = RegexPattern::compile(regex, 0, pe, status); 532 matcher = pat->matcher(input, status); 533 retVal = matcher->matches(status); 534 535 delete matcher; 536 delete pat; 537 return retVal; 538 } 539 540 541 // 542 // matches, UText mode 543 // 544 UBool U_EXPORT2 RegexPattern::matches(UText *regex, 545 UText *input, 546 UParseError &pe, 547 UErrorCode &status) { 548 549 if (U_FAILURE(status)) {return false;} 550 551 UBool retVal = false; 552 RegexPattern *pat = nullptr; 553 RegexMatcher *matcher = nullptr; 554 555 pat = RegexPattern::compile(regex, 0, pe, status); 556 matcher = pat->matcher(status); 557 if (U_SUCCESS(status)) { 558 matcher->reset(input); 559 retVal = matcher->matches(status); 560 } 561 562 delete matcher; 563 delete pat; 564 return retVal; 565 } 566 567 568 569 570 571 //--------------------------------------------------------------------- 572 // 573 // pattern 574 // 575 //--------------------------------------------------------------------- 576 UnicodeString RegexPattern::pattern() const { 577 if (fPatternString != nullptr) { 578 return *fPatternString; 579 } else if (fPattern == nullptr) { 580 return {}; 581 } else { 582 UErrorCode status = U_ZERO_ERROR; 583 int64_t nativeLen = utext_nativeLength(fPattern); 584 int32_t len16 = utext_extract(fPattern, 0, nativeLen, nullptr, 0, &status); // buffer overflow error 585 UnicodeString result; 586 587 status = U_ZERO_ERROR; 588 char16_t *resultChars = result.getBuffer(len16); 589 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning 590 result.releaseBuffer(len16); 591 592 return result; 593 } 594 } 595 596 597 598 599 //--------------------------------------------------------------------- 600 // 601 // patternText 602 // 603 //--------------------------------------------------------------------- 604 UText *RegexPattern::patternText(UErrorCode &status) const { 605 if (U_FAILURE(status)) {return nullptr;} 606 status = U_ZERO_ERROR; 607 608 if (fPattern != nullptr) { 609 return fPattern; 610 } else { 611 RegexStaticSets::initGlobals(&status); 612 return RegexStaticSets::gStaticSets->fEmptyText; 613 } 614 } 615 616 617 //-------------------------------------------------------------------------------- 618 // 619 // groupNumberFromName() 620 // 621 //-------------------------------------------------------------------------------- 622 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const { 623 if (U_FAILURE(status)) { 624 return 0; 625 } 626 627 // No need to explicitly check for syntactically valid names. 628 // Invalid ones will never be in the map, and the lookup will fail. 629 630 int32_t number = fNamedCaptureMap ? uhash_geti(fNamedCaptureMap, &groupName) : 0; 631 if (number == 0) { 632 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; 633 } 634 return number; 635 } 636 637 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const { 638 if (U_FAILURE(status)) { 639 return 0; 640 } 641 UnicodeString name(groupName, nameLength, US_INV); 642 return groupNumberFromName(name, status); 643 } 644 645 646 //--------------------------------------------------------------------- 647 // 648 // split 649 // 650 //--------------------------------------------------------------------- 651 int32_t RegexPattern::split(const UnicodeString &input, 652 UnicodeString dest[], 653 int32_t destCapacity, 654 UErrorCode &status) const 655 { 656 if (U_FAILURE(status)) { 657 return 0; 658 } 659 660 RegexMatcher m(this); 661 int32_t r = 0; 662 // Check m's status to make sure all is ok. 663 if (U_SUCCESS(m.fDeferredStatus)) { 664 r = m.split(input, dest, destCapacity, status); 665 } 666 return r; 667 } 668 669 // 670 // split, UText mode 671 // 672 int32_t RegexPattern::split(UText *input, 673 UText *dest[], 674 int32_t destCapacity, 675 UErrorCode &status) const 676 { 677 if (U_FAILURE(status)) { 678 return 0; 679 } 680 681 RegexMatcher m(this); 682 int32_t r = 0; 683 // Check m's status to make sure all is ok. 684 if (U_SUCCESS(m.fDeferredStatus)) { 685 r = m.split(input, dest, destCapacity, status); 686 } 687 return r; 688 } 689 690 691 //--------------------------------------------------------------------- 692 // 693 // dump Output the compiled form of the pattern. 694 // Debugging function only. 695 // 696 //--------------------------------------------------------------------- 697 void RegexPattern::dumpOp(int32_t index) const { 698 (void)index; // Suppress warnings in non-debug build. 699 #if defined(REGEX_DEBUG) 700 static const char * const opNames[] = {URX_OPCODE_NAMES}; 701 int32_t op = fCompiledPat->elementAti(index); 702 int32_t val = URX_VAL(op); 703 int32_t type = URX_TYPE(op); 704 int32_t pinnedType = type; 705 if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) { 706 pinnedType = 0; 707 } 708 709 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]); 710 switch (type) { 711 case URX_NOP: 712 case URX_DOTANY: 713 case URX_DOTANY_ALL: 714 case URX_FAIL: 715 case URX_CARET: 716 case URX_DOLLAR: 717 case URX_BACKSLASH_G: 718 case URX_BACKSLASH_X: 719 case URX_END: 720 case URX_DOLLAR_M: 721 case URX_CARET_M: 722 // Types with no operand field of interest. 723 break; 724 725 case URX_RESERVED_OP: 726 case URX_START_CAPTURE: 727 case URX_END_CAPTURE: 728 case URX_STATE_SAVE: 729 case URX_JMP: 730 case URX_JMP_SAV: 731 case URX_JMP_SAV_X: 732 case URX_BACKSLASH_B: 733 case URX_BACKSLASH_BU: 734 case URX_BACKSLASH_D: 735 case URX_BACKSLASH_Z: 736 case URX_STRING_LEN: 737 case URX_CTR_INIT: 738 case URX_CTR_INIT_NG: 739 case URX_CTR_LOOP: 740 case URX_CTR_LOOP_NG: 741 case URX_RELOC_OPRND: 742 case URX_STO_SP: 743 case URX_LD_SP: 744 case URX_BACKREF: 745 case URX_STO_INP_LOC: 746 case URX_JMPX: 747 case URX_LA_START: 748 case URX_LA_END: 749 case URX_BACKREF_I: 750 case URX_LB_START: 751 case URX_LB_CONT: 752 case URX_LB_END: 753 case URX_LBN_CONT: 754 case URX_LBN_END: 755 case URX_LOOP_C: 756 case URX_LOOP_DOT_I: 757 case URX_BACKSLASH_H: 758 case URX_BACKSLASH_R: 759 case URX_BACKSLASH_V: 760 // types with an integer operand field. 761 printf("%d", val); 762 break; 763 764 case URX_ONECHAR: 765 case URX_ONECHAR_I: 766 if (val < 0x20) { 767 printf("%#x", val); 768 } else { 769 printf("'%s'", CStr(UnicodeString(val))()); 770 } 771 break; 772 773 case URX_STRING: 774 case URX_STRING_I: 775 { 776 int32_t lengthOp = fCompiledPat->elementAti(index+1); 777 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); 778 int32_t length = URX_VAL(lengthOp); 779 UnicodeString str(fLiteralText, val, length); 780 printf("%s", CStr(str)()); 781 } 782 break; 783 784 case URX_SETREF: 785 case URX_LOOP_SR_I: 786 { 787 UnicodeString s; 788 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); 789 set->toPattern(s, true); 790 printf("%s", CStr(s)()); 791 } 792 break; 793 794 case URX_STATIC_SETREF: 795 case URX_STAT_SETREF_N: 796 { 797 UnicodeString s; 798 if (val & URX_NEG_SET) { 799 printf("NOT "); 800 val &= ~URX_NEG_SET; 801 } 802 UnicodeSet &set = RegexStaticSets::gStaticSets->fPropSets[val]; 803 set.toPattern(s, true); 804 printf("%s", CStr(s)()); 805 } 806 break; 807 808 809 default: 810 printf("??????"); 811 break; 812 } 813 printf("\n"); 814 #endif 815 } 816 817 818 void RegexPattern::dumpPattern() const { 819 #if defined(REGEX_DEBUG) 820 int index; 821 822 UnicodeString patStr; 823 for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) { 824 patStr.append(c); 825 } 826 printf("Original Pattern: \"%s\"\n", CStr(patStr)()); 827 printf(" Min Match Length: %d\n", fMinMatchLen); 828 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType)); 829 if (fStartType == START_STRING) { 830 UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen); 831 printf(" Initial match string: \"%s\"\n", CStr(initialString)()); 832 } else if (fStartType == START_SET) { 833 UnicodeString s; 834 fInitialChars->toPattern(s, true); 835 printf(" Match First Chars: %s\n", CStr(s)()); 836 837 } else if (fStartType == START_CHAR) { 838 printf(" First char of Match: "); 839 if (fInitialChar > 0x20) { 840 printf("'%s'\n", CStr(UnicodeString(fInitialChar))()); 841 } else { 842 printf("%#x\n", fInitialChar); 843 } 844 } 845 846 printf("Named Capture Groups:\n"); 847 if (!fNamedCaptureMap || uhash_count(fNamedCaptureMap) == 0) { 848 printf(" None\n"); 849 } else { 850 int32_t pos = UHASH_FIRST; 851 const UHashElement *el = nullptr; 852 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) { 853 const UnicodeString *name = (const UnicodeString *)el->key.pointer; 854 int32_t number = el->value.integer; 855 printf(" %d\t%s\n", number, CStr(*name)()); 856 } 857 } 858 859 printf("\nIndex Binary Type Operand\n" \ 860 "-------------------------------------------\n"); 861 for (index = 0; index<fCompiledPat->size(); index++) { 862 dumpOp(index); 863 } 864 printf("\n\n"); 865 #endif 866 } 867 868 869 870 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) 871 872 U_NAMESPACE_END 873 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS