tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

repattrn.cpp (25500B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 //
      4 //  file:  repattrn.cpp
      5 //
      6 /*
      7 ***************************************************************************
      8 *   Copyright (C) 2002-2016 International Business Machines Corporation
      9 *   and others. All rights reserved.
     10 ***************************************************************************
     11 */
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     16 
     17 #include "unicode/regex.h"
     18 #include "unicode/uclean.h"
     19 #include "cmemory.h"
     20 #include "cstr.h"
     21 #include "uassert.h"
     22 #include "uhash.h"
     23 #include "uvector.h"
     24 #include "uvectr32.h"
     25 #include "uvectr64.h"
     26 #include "regexcmp.h"
     27 #include "regeximp.h"
     28 #include "regexst.h"
     29 
     30 U_NAMESPACE_BEGIN
     31 
     32 //--------------------------------------------------------------------------
     33 //
     34 //    RegexPattern    Default Constructor
     35 //
     36 //--------------------------------------------------------------------------
     37 RegexPattern::RegexPattern() {
     38    // Init all of this instances data.
     39    init();
     40 }
     41 
     42 
     43 //--------------------------------------------------------------------------
     44 //
     45 //   Copy Constructor        Note:  This is a rather inefficient implementation,
     46 //                                  but it probably doesn't matter.
     47 //
     48 //--------------------------------------------------------------------------
     49 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
     50    init();
     51    *this = other;
     52 }
     53 
     54 
     55 
     56 //--------------------------------------------------------------------------
     57 //
     58 //    Assignment Operator
     59 //
     60 //--------------------------------------------------------------------------
     61 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
     62    if (this == &other) {
     63        // Source and destination are the same.  Don't do anything.
     64        return *this;
     65    }
     66 
     67    // Clean out any previous contents of object being assigned to.
     68    zap();
     69 
     70    // Give target object a default initialization
     71    init();
     72 
     73    // Copy simple fields
     74    fDeferredStatus   = other.fDeferredStatus;
     75 
     76    if (U_FAILURE(fDeferredStatus)) {
     77        return *this;
     78    }
     79 
     80    if (other.fPatternString == nullptr) {
     81        fPatternString = nullptr;
     82        fPattern = utext_clone(fPattern, other.fPattern, false, true, &fDeferredStatus);
     83    } else {
     84        fPatternString = new UnicodeString(*(other.fPatternString));
     85        if (fPatternString == nullptr) {
     86            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
     87        } else {
     88            fPattern = utext_openConstUnicodeString(nullptr, fPatternString, &fDeferredStatus);
     89        }
     90    }
     91    if (U_FAILURE(fDeferredStatus)) {
     92        return *this;
     93    }
     94 
     95    fFlags            = other.fFlags;
     96    fLiteralText      = other.fLiteralText;
     97    fMinMatchLen      = other.fMinMatchLen;
     98    fFrameSize        = other.fFrameSize;
     99    fDataSize         = other.fDataSize;
    100 
    101    fStartType        = other.fStartType;
    102    fInitialStringIdx = other.fInitialStringIdx;
    103    fInitialStringLen = other.fInitialStringLen;
    104    *fInitialChars    = *other.fInitialChars;
    105    fInitialChar      = other.fInitialChar;
    106    *fInitialChars8   = *other.fInitialChars8;
    107    fNeedsAltInput    = other.fNeedsAltInput;
    108 
    109    //  Copy the pattern.  It's just values, nothing deep to copy.
    110    fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
    111    fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
    112 
    113    //  Copy the Unicode Sets.
    114    //    Could be made more efficient if the sets were reference counted and shared,
    115    //    but I doubt that pattern copying will be particularly common.
    116    //    Note:  init() already added an empty element zero to fSets
    117    int32_t i;
    118    int32_t  numSets = other.fSets->size();
    119    fSets8 = new Regex8BitSet[numSets];
    120    if (fSets8 == nullptr) {
    121    	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    122    	return *this;
    123    }
    124    for (i=1; i<numSets; i++) {
    125        if (U_FAILURE(fDeferredStatus)) {
    126            return *this;
    127        }
    128        UnicodeSet* sourceSet = static_cast<UnicodeSet*>(other.fSets->elementAt(i));
    129        UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
    130        if (newSet == nullptr) {
    131            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    132            break;
    133        }
    134        fSets->addElement(newSet, fDeferredStatus);
    135        fSets8[i] = other.fSets8[i];
    136    }
    137 
    138    // Copy the named capture group hash map.
    139    if (other.fNamedCaptureMap != nullptr && initNamedCaptureMap()) {
    140        int32_t hashPos = UHASH_FIRST;
    141        while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
    142            if (U_FAILURE(fDeferredStatus)) {
    143                break;
    144            }
    145            const UnicodeString* name = static_cast<const UnicodeString*>(hashEl->key.pointer);
    146            UnicodeString *key = new UnicodeString(*name);
    147            int32_t val = hashEl->value.integer;
    148            if (key == nullptr) {
    149                fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    150            } else {
    151                uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
    152            }
    153        }
    154    }
    155    return *this;
    156 }
    157 
    158 
    159 //--------------------------------------------------------------------------
    160 //
    161 //    init        Shared initialization for use by constructors.
    162 //                Bring an uninitialized RegexPattern up to a default state.
    163 //
    164 //--------------------------------------------------------------------------
    165 void RegexPattern::init() {
    166    fFlags            = 0;
    167    fCompiledPat      = nullptr;
    168    fLiteralText.remove();
    169    fSets             = nullptr;
    170    fSets8            = nullptr;
    171    fDeferredStatus   = U_ZERO_ERROR;
    172    fMinMatchLen      = 0;
    173    fFrameSize        = 0;
    174    fDataSize         = 0;
    175    fGroupMap         = nullptr;
    176    fStartType        = START_NO_INFO;
    177    fInitialStringIdx = 0;
    178    fInitialStringLen = 0;
    179    fInitialChars     = nullptr;
    180    fInitialChar      = 0;
    181    fInitialChars8    = nullptr;
    182    fNeedsAltInput    = false;
    183    fNamedCaptureMap  = nullptr;
    184 
    185    fPattern          = nullptr; // will be set later
    186    fPatternString    = nullptr; // may be set later
    187    fCompiledPat      = new UVector64(fDeferredStatus);
    188    fGroupMap         = new UVector32(fDeferredStatus);
    189    fSets             = new UVector(fDeferredStatus);
    190    fInitialChars     = new UnicodeSet;
    191    fInitialChars8    = new Regex8BitSet;
    192    if (U_FAILURE(fDeferredStatus)) {
    193        return;
    194    }
    195    if (fCompiledPat == nullptr  || fGroupMap == nullptr || fSets == nullptr ||
    196            fInitialChars == nullptr || fInitialChars8 == nullptr) {
    197        fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    198        return;
    199    }
    200 
    201    // Slot zero of the vector of sets is reserved.  Fill it here.
    202    fSets->addElement(static_cast<int32_t>(0), fDeferredStatus);
    203 }
    204 
    205 
    206 bool RegexPattern::initNamedCaptureMap() {
    207    if (fNamedCaptureMap) {
    208        return true;
    209    }
    210    fNamedCaptureMap  = uhash_openSize(uhash_hashUnicodeString,     // Key hash function
    211                                       uhash_compareUnicodeString,  // Key comparator function
    212                                       uhash_compareLong,           // Value comparator function
    213                                       7,                           // Initial table capacity
    214                                       &fDeferredStatus);
    215    if (U_FAILURE(fDeferredStatus)) {
    216        return false;
    217    }
    218 
    219    // fNamedCaptureMap owns its key strings, type (UnicodeString *)
    220    uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
    221    return true;
    222 }
    223 
    224 //--------------------------------------------------------------------------
    225 //
    226 //   zap            Delete everything owned by this RegexPattern.
    227 //
    228 //--------------------------------------------------------------------------
    229 void RegexPattern::zap() {
    230    delete fCompiledPat;
    231    fCompiledPat = nullptr;
    232    int i;
    233    for (i=1; i<fSets->size(); i++) {
    234        UnicodeSet *s;
    235        s = static_cast<UnicodeSet*>(fSets->elementAt(i));
    236        delete s;
    237    }
    238    delete fSets;
    239    fSets = nullptr;
    240    delete[] fSets8;
    241    fSets8 = nullptr;
    242    delete fGroupMap;
    243    fGroupMap = nullptr;
    244    delete fInitialChars;
    245    fInitialChars = nullptr;
    246    delete fInitialChars8;
    247    fInitialChars8 = nullptr;
    248    if (fPattern != nullptr) {
    249        utext_close(fPattern);
    250        fPattern = nullptr;
    251    }
    252    if (fPatternString != nullptr) {
    253        delete fPatternString;
    254        fPatternString = nullptr;
    255    }
    256    if (fNamedCaptureMap != nullptr) {
    257        uhash_close(fNamedCaptureMap);
    258        fNamedCaptureMap = nullptr;
    259    }
    260 }
    261 
    262 
    263 //--------------------------------------------------------------------------
    264 //
    265 //   Destructor
    266 //
    267 //--------------------------------------------------------------------------
    268 RegexPattern::~RegexPattern() {
    269    zap();
    270 }
    271 
    272 
    273 //--------------------------------------------------------------------------
    274 //
    275 //   Clone
    276 //
    277 //--------------------------------------------------------------------------
    278 RegexPattern  *RegexPattern::clone() const {
    279    RegexPattern  *copy = new RegexPattern(*this);
    280    return copy;
    281 }
    282 
    283 
    284 //--------------------------------------------------------------------------
    285 //
    286 //   operator ==   (comparison)    Consider to patterns to be == if the
    287 //                                 pattern strings and the flags are the same.
    288 //                                 Note that pattern strings with the same
    289 //                                 characters can still be considered different.
    290 //
    291 //--------------------------------------------------------------------------
    292 bool    RegexPattern::operator ==(const RegexPattern &other) const {
    293    if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
    294        if (this->fPatternString != nullptr && other.fPatternString != nullptr) {
    295            return *(this->fPatternString) == *(other.fPatternString);
    296        } else if (this->fPattern == nullptr) {
    297            if (other.fPattern == nullptr) {
    298                return true;
    299            }
    300        } else if (other.fPattern != nullptr) {
    301            UTEXT_SETNATIVEINDEX(this->fPattern, 0);
    302            UTEXT_SETNATIVEINDEX(other.fPattern, 0);
    303            return utext_equals(this->fPattern, other.fPattern);
    304        }
    305    }
    306    return false;
    307 }
    308 
    309 //---------------------------------------------------------------------
    310 //
    311 //   compile
    312 //
    313 //---------------------------------------------------------------------
    314 RegexPattern * U_EXPORT2
    315 RegexPattern::compile(const UnicodeString &regex,
    316                      uint32_t             flags,
    317                      UParseError          &pe,
    318                      UErrorCode           &status)
    319 {
    320    if (U_FAILURE(status)) {
    321        return nullptr;
    322    }
    323 
    324    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    325    UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    326    UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    327 
    328    if ((flags & ~allFlags) != 0) {
    329        status = U_REGEX_INVALID_FLAG;
    330        return nullptr;
    331    }
    332 
    333    if ((flags & UREGEX_CANON_EQ) != 0) {
    334        status = U_REGEX_UNIMPLEMENTED;
    335        return nullptr;
    336    }
    337 
    338    RegexPattern *This = new RegexPattern;
    339    if (This == nullptr) {
    340        status = U_MEMORY_ALLOCATION_ERROR;
    341        return nullptr;
    342    }
    343    if (U_FAILURE(This->fDeferredStatus)) {
    344        status = This->fDeferredStatus;
    345        delete This;
    346        return nullptr;
    347    }
    348    This->fFlags = flags;
    349 
    350    RegexCompile     compiler(This, status);
    351    compiler.compile(regex, pe, status);
    352 
    353    if (U_FAILURE(status)) {
    354        delete This;
    355        This = nullptr;
    356    }
    357 
    358    return This;
    359 }
    360 
    361 
    362 //
    363 //   compile, UText mode
    364 //
    365 RegexPattern * U_EXPORT2
    366 RegexPattern::compile(UText                *regex,
    367                      uint32_t             flags,
    368                      UParseError          &pe,
    369                      UErrorCode           &status)
    370 {
    371    if (U_FAILURE(status)) {
    372        return nullptr;
    373    }
    374 
    375    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    376                              UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    377                              UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
    378 
    379    if ((flags & ~allFlags) != 0) {
    380        status = U_REGEX_INVALID_FLAG;
    381        return nullptr;
    382    }
    383 
    384    if ((flags & UREGEX_CANON_EQ) != 0) {
    385        status = U_REGEX_UNIMPLEMENTED;
    386        return nullptr;
    387    }
    388 
    389    RegexPattern *This = new RegexPattern;
    390    if (This == nullptr) {
    391        status = U_MEMORY_ALLOCATION_ERROR;
    392        return nullptr;
    393    }
    394    if (U_FAILURE(This->fDeferredStatus)) {
    395        status = This->fDeferredStatus;
    396        delete This;
    397        return nullptr;
    398    }
    399    This->fFlags = flags;
    400 
    401    RegexCompile     compiler(This, status);
    402    compiler.compile(regex, pe, status);
    403 
    404    if (U_FAILURE(status)) {
    405        delete This;
    406        This = nullptr;
    407    }
    408 
    409    return This;
    410 }
    411 
    412 //
    413 //   compile with default flags.
    414 //
    415 RegexPattern * U_EXPORT2
    416 RegexPattern::compile(const UnicodeString &regex,
    417                      UParseError         &pe,
    418                      UErrorCode          &err)
    419 {
    420    return compile(regex, 0, pe, err);
    421 }
    422 
    423 
    424 //
    425 //   compile with default flags, UText mode
    426 //
    427 RegexPattern * U_EXPORT2
    428 RegexPattern::compile(UText               *regex,
    429                      UParseError         &pe,
    430                      UErrorCode          &err)
    431 {
    432    return compile(regex, 0, pe, err);
    433 }
    434 
    435 
    436 //
    437 //   compile with no UParseErr parameter.
    438 //
    439 RegexPattern * U_EXPORT2
    440 RegexPattern::compile(const UnicodeString &regex,
    441                      uint32_t             flags,
    442                      UErrorCode          &err)
    443 {
    444    UParseError pe;
    445    return compile(regex, flags, pe, err);
    446 }
    447 
    448 
    449 //
    450 //   compile with no UParseErr parameter, UText mode
    451 //
    452 RegexPattern * U_EXPORT2
    453 RegexPattern::compile(UText                *regex,
    454                      uint32_t             flags,
    455                      UErrorCode           &err)
    456 {
    457    UParseError pe;
    458    return compile(regex, flags, pe, err);
    459 }
    460 
    461 
    462 //---------------------------------------------------------------------
    463 //
    464 //   flags
    465 //
    466 //---------------------------------------------------------------------
    467 uint32_t RegexPattern::flags() const {
    468    return fFlags;
    469 }
    470 
    471 
    472 //---------------------------------------------------------------------
    473 //
    474 //   matcher(UnicodeString, err)
    475 //
    476 //---------------------------------------------------------------------
    477 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
    478                                    UErrorCode          &status)  const {
    479    RegexMatcher    *retMatcher = matcher(status);
    480    if (retMatcher != nullptr) {
    481        retMatcher->fDeferredStatus = status;
    482        retMatcher->reset(input);
    483    }
    484    return retMatcher;
    485 }
    486 
    487 
    488 //---------------------------------------------------------------------
    489 //
    490 //   matcher(status)
    491 //
    492 //---------------------------------------------------------------------
    493 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
    494    RegexMatcher    *retMatcher = nullptr;
    495 
    496    if (U_FAILURE(status)) {
    497        return nullptr;
    498    }
    499    if (U_FAILURE(fDeferredStatus)) {
    500        status = fDeferredStatus;
    501        return nullptr;
    502    }
    503 
    504    retMatcher = new RegexMatcher(this);
    505    if (retMatcher == nullptr) {
    506        status = U_MEMORY_ALLOCATION_ERROR;
    507        return nullptr;
    508    }
    509    return retMatcher;
    510 }
    511 
    512 
    513 
    514 //---------------------------------------------------------------------
    515 //
    516 //   matches        Convenience function to test for a match, starting
    517 //                  with a pattern string and a data string.
    518 //
    519 //---------------------------------------------------------------------
    520 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
    521              const UnicodeString   &input,
    522                    UParseError     &pe,
    523                    UErrorCode      &status) {
    524 
    525    if (U_FAILURE(status)) {return false;}
    526 
    527    UBool         retVal;
    528    RegexPattern *pat     = nullptr;
    529    RegexMatcher *matcher = nullptr;
    530 
    531    pat     = RegexPattern::compile(regex, 0, pe, status);
    532    matcher = pat->matcher(input, status);
    533    retVal  = matcher->matches(status);
    534 
    535    delete matcher;
    536    delete pat;
    537    return retVal;
    538 }
    539 
    540 
    541 //
    542 //   matches, UText mode
    543 //
    544 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
    545                    UText           *input,
    546                    UParseError     &pe,
    547                    UErrorCode      &status) {
    548 
    549    if (U_FAILURE(status)) {return false;}
    550 
    551    UBool         retVal  = false;
    552    RegexPattern *pat     = nullptr;
    553    RegexMatcher *matcher = nullptr;
    554 
    555    pat     = RegexPattern::compile(regex, 0, pe, status);
    556    matcher = pat->matcher(status);
    557    if (U_SUCCESS(status)) {
    558        matcher->reset(input);
    559        retVal  = matcher->matches(status);
    560    }
    561 
    562    delete matcher;
    563    delete pat;
    564    return retVal;
    565 }
    566 
    567 
    568 
    569 
    570 
    571 //---------------------------------------------------------------------
    572 //
    573 //   pattern
    574 //
    575 //---------------------------------------------------------------------
    576 UnicodeString RegexPattern::pattern() const {
    577    if (fPatternString != nullptr) {
    578        return *fPatternString;
    579    } else if (fPattern == nullptr) {
    580        return {};
    581    } else {
    582        UErrorCode status = U_ZERO_ERROR;
    583        int64_t nativeLen = utext_nativeLength(fPattern);
    584        int32_t len16 = utext_extract(fPattern, 0, nativeLen, nullptr, 0, &status); // buffer overflow error
    585        UnicodeString result;
    586 
    587        status = U_ZERO_ERROR;
    588        char16_t *resultChars = result.getBuffer(len16);
    589        utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
    590        result.releaseBuffer(len16);
    591 
    592        return result;
    593    }
    594 }
    595 
    596 
    597 
    598 
    599 //---------------------------------------------------------------------
    600 //
    601 //   patternText
    602 //
    603 //---------------------------------------------------------------------
    604 UText *RegexPattern::patternText(UErrorCode      &status) const {
    605    if (U_FAILURE(status)) {return nullptr;}
    606    status = U_ZERO_ERROR;
    607 
    608    if (fPattern != nullptr) {
    609        return fPattern;
    610    } else {
    611        RegexStaticSets::initGlobals(&status);
    612        return RegexStaticSets::gStaticSets->fEmptyText;
    613    }
    614 }
    615 
    616 
    617 //--------------------------------------------------------------------------------
    618 //
    619 //  groupNumberFromName()
    620 //
    621 //--------------------------------------------------------------------------------
    622 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
    623    if (U_FAILURE(status)) {
    624        return 0;
    625    }
    626 
    627    // No need to explicitly check for syntactically valid names.
    628    // Invalid ones will never be in the map, and the lookup will fail.
    629 
    630    int32_t number = fNamedCaptureMap ? uhash_geti(fNamedCaptureMap, &groupName) : 0;
    631    if (number == 0) {
    632        status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
    633    }
    634    return number;
    635 }
    636 
    637 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
    638    if (U_FAILURE(status)) {
    639        return 0;
    640    }
    641    UnicodeString name(groupName, nameLength, US_INV);
    642    return groupNumberFromName(name, status);
    643 }
    644 
    645 
    646 //---------------------------------------------------------------------
    647 //
    648 //   split
    649 //
    650 //---------------------------------------------------------------------
    651 int32_t  RegexPattern::split(const UnicodeString &input,
    652        UnicodeString    dest[],
    653        int32_t          destCapacity,
    654        UErrorCode      &status) const
    655 {
    656    if (U_FAILURE(status)) {
    657        return 0;
    658    }
    659 
    660    RegexMatcher  m(this);
    661    int32_t r = 0;
    662    // Check m's status to make sure all is ok.
    663    if (U_SUCCESS(m.fDeferredStatus)) {
    664    	r = m.split(input, dest, destCapacity, status);
    665    }
    666    return r;
    667 }
    668 
    669 //
    670 //   split, UText mode
    671 //
    672 int32_t  RegexPattern::split(UText *input,
    673        UText           *dest[],
    674        int32_t          destCapacity,
    675        UErrorCode      &status) const
    676 {
    677    if (U_FAILURE(status)) {
    678        return 0;
    679    }
    680 
    681    RegexMatcher  m(this);
    682    int32_t r = 0;
    683    // Check m's status to make sure all is ok.
    684    if (U_SUCCESS(m.fDeferredStatus)) {
    685    	r = m.split(input, dest, destCapacity, status);
    686    }
    687    return r;
    688 }
    689 
    690 
    691 //---------------------------------------------------------------------
    692 //
    693 //   dump    Output the compiled form of the pattern.
    694 //           Debugging function only.
    695 //
    696 //---------------------------------------------------------------------
    697 void   RegexPattern::dumpOp(int32_t index) const {
    698    (void)index;  // Suppress warnings in non-debug build.
    699 #if defined(REGEX_DEBUG)
    700    static const char * const opNames[] = {URX_OPCODE_NAMES};
    701    int32_t op          = fCompiledPat->elementAti(index);
    702    int32_t val         = URX_VAL(op);
    703    int32_t type        = URX_TYPE(op);
    704    int32_t pinnedType  = type;
    705    if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
    706        pinnedType = 0;
    707    }
    708 
    709    printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
    710    switch (type) {
    711    case URX_NOP:
    712    case URX_DOTANY:
    713    case URX_DOTANY_ALL:
    714    case URX_FAIL:
    715    case URX_CARET:
    716    case URX_DOLLAR:
    717    case URX_BACKSLASH_G:
    718    case URX_BACKSLASH_X:
    719    case URX_END:
    720    case URX_DOLLAR_M:
    721    case URX_CARET_M:
    722        // Types with no operand field of interest.
    723        break;
    724 
    725    case URX_RESERVED_OP:
    726    case URX_START_CAPTURE:
    727    case URX_END_CAPTURE:
    728    case URX_STATE_SAVE:
    729    case URX_JMP:
    730    case URX_JMP_SAV:
    731    case URX_JMP_SAV_X:
    732    case URX_BACKSLASH_B:
    733    case URX_BACKSLASH_BU:
    734    case URX_BACKSLASH_D:
    735    case URX_BACKSLASH_Z:
    736    case URX_STRING_LEN:
    737    case URX_CTR_INIT:
    738    case URX_CTR_INIT_NG:
    739    case URX_CTR_LOOP:
    740    case URX_CTR_LOOP_NG:
    741    case URX_RELOC_OPRND:
    742    case URX_STO_SP:
    743    case URX_LD_SP:
    744    case URX_BACKREF:
    745    case URX_STO_INP_LOC:
    746    case URX_JMPX:
    747    case URX_LA_START:
    748    case URX_LA_END:
    749    case URX_BACKREF_I:
    750    case URX_LB_START:
    751    case URX_LB_CONT:
    752    case URX_LB_END:
    753    case URX_LBN_CONT:
    754    case URX_LBN_END:
    755    case URX_LOOP_C:
    756    case URX_LOOP_DOT_I:
    757    case URX_BACKSLASH_H:
    758    case URX_BACKSLASH_R:
    759    case URX_BACKSLASH_V:
    760        // types with an integer operand field.
    761        printf("%d", val);
    762        break;
    763 
    764    case URX_ONECHAR:
    765    case URX_ONECHAR_I:
    766        if (val < 0x20) {
    767            printf("%#x", val);
    768        } else {
    769            printf("'%s'", CStr(UnicodeString(val))());
    770        }
    771        break;
    772 
    773    case URX_STRING:
    774    case URX_STRING_I:
    775        {
    776            int32_t lengthOp       = fCompiledPat->elementAti(index+1);
    777            U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
    778            int32_t length = URX_VAL(lengthOp);
    779            UnicodeString str(fLiteralText, val, length);
    780            printf("%s", CStr(str)());
    781        }
    782        break;
    783 
    784    case URX_SETREF:
    785    case URX_LOOP_SR_I:
    786        {
    787            UnicodeString s;
    788            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
    789            set->toPattern(s, true);
    790            printf("%s", CStr(s)());
    791        }
    792        break;
    793 
    794    case URX_STATIC_SETREF:
    795    case URX_STAT_SETREF_N:
    796        {
    797            UnicodeString s;
    798            if (val & URX_NEG_SET) {
    799                printf("NOT ");
    800                val &= ~URX_NEG_SET;
    801            }
    802            UnicodeSet &set = RegexStaticSets::gStaticSets->fPropSets[val];
    803            set.toPattern(s, true);
    804            printf("%s", CStr(s)());
    805        }
    806        break;
    807 
    808 
    809    default:
    810        printf("??????");
    811        break;
    812    }
    813    printf("\n");
    814 #endif
    815 }
    816 
    817 
    818 void RegexPattern::dumpPattern() const {
    819 #if defined(REGEX_DEBUG)
    820    int      index;
    821 
    822    UnicodeString patStr;
    823    for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
    824        patStr.append(c);
    825    }
    826    printf("Original Pattern:  \"%s\"\n", CStr(patStr)());
    827    printf("   Min Match Length:  %d\n", fMinMatchLen);
    828    printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
    829    if (fStartType == START_STRING) {
    830        UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
    831        printf("   Initial match string: \"%s\"\n", CStr(initialString)());
    832    } else if (fStartType == START_SET) {
    833        UnicodeString s;
    834        fInitialChars->toPattern(s, true);
    835        printf("    Match First Chars: %s\n", CStr(s)());
    836 
    837    } else if (fStartType == START_CHAR) {
    838        printf("    First char of Match: ");
    839        if (fInitialChar > 0x20) {
    840                printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
    841            } else {
    842                printf("%#x\n", fInitialChar);
    843            }
    844    }
    845 
    846    printf("Named Capture Groups:\n");
    847    if (!fNamedCaptureMap || uhash_count(fNamedCaptureMap) == 0) {
    848        printf("   None\n");
    849    } else {
    850        int32_t pos = UHASH_FIRST;
    851        const UHashElement *el = nullptr;
    852        while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
    853            const UnicodeString *name = (const UnicodeString *)el->key.pointer;
    854            int32_t number = el->value.integer;
    855            printf("   %d\t%s\n", number, CStr(*name)());
    856        }
    857    }
    858 
    859    printf("\nIndex   Binary     Type             Operand\n" \
    860           "-------------------------------------------\n");
    861    for (index = 0; index<fCompiledPat->size(); index++) {
    862        dumpOp(index);
    863    }
    864    printf("\n\n");
    865 #endif
    866 }
    867 
    868 
    869 
    870 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
    871 
    872 U_NAMESPACE_END
    873 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS