tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rbbisetb.cpp (25394B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 //
      4 //  rbbisetb.cpp
      5 //
      6 /*
      7 ***************************************************************************
      8 *   Copyright (C) 2002-2008 International Business Machines Corporation   *
      9 *   and others. All rights reserved.                                      *
     10 ***************************************************************************
     11 */
     12 //
     13 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules
     14 //                   (part of the rule building process.)
     15 //
     16 //      Starting with the rules parse tree from the scanner,
     17 //
     18 //                   -  Enumerate the set of UnicodeSets that are referenced
     19 //                      by the RBBI rules.
     20 //                   -  compute a set of non-overlapping character ranges
     21 //                      with all characters within a range belonging to the same
     22 //                      set of input unicode sets.
     23 //                   -  Derive a set of non-overlapping UnicodeSet (like things)
     24 //                      that will correspond to columns in the state table for
     25 //                      the RBBI execution engine.  All characters within one
     26 //                      of these sets belong to the same set of the original
     27 //                      UnicodeSets from the user's rules.
     28 //                   -  construct the trie table that maps input characters
     29 //                      to the index of the matching non-overlapping set of set from
     30 //                      the previous step.
     31 //
     32 
     33 #include "unicode/utypes.h"
     34 
     35 #if !UCONFIG_NO_BREAK_ITERATION
     36 
     37 #include "unicode/uniset.h"
     38 #include "uvector.h"
     39 #include "uassert.h"
     40 #include "cmemory.h"
     41 #include "cstring.h"
     42 
     43 #include "rbbisetb.h"
     44 #include "rbbinode.h"
     45 
     46 U_NAMESPACE_BEGIN
     47 
     48 const int32_t kMaxCharCategoriesFor8BitsTrie = 255;
     49 //------------------------------------------------------------------------
     50 //
     51 //   Constructor
     52 //
     53 //------------------------------------------------------------------------
     54 RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
     55 {
     56    fRB             = rb;
     57    fStatus         = rb->fStatus;
     58    fRangeList      = nullptr;
     59    fMutableTrie    = nullptr;
     60    fTrie           = nullptr;
     61    fTrieSize       = 0;
     62    fGroupCount     = 0;
     63    fSawBOF         = false;
     64 }
     65 
     66 
     67 //------------------------------------------------------------------------
     68 //
     69 //   Destructor
     70 //
     71 //------------------------------------------------------------------------
     72 RBBISetBuilder::~RBBISetBuilder()
     73 {
     74    RangeDescriptor   *nextRangeDesc;
     75 
     76    // Walk through & delete the linked list of RangeDescriptors
     77    for (nextRangeDesc = fRangeList; nextRangeDesc!=nullptr;) {
     78        RangeDescriptor *r = nextRangeDesc;
     79        nextRangeDesc      = r->fNext;
     80        delete r;
     81    }
     82 
     83    ucptrie_close(fTrie);
     84    umutablecptrie_close(fMutableTrie);
     85 }
     86 
     87 
     88 
     89 
     90 //------------------------------------------------------------------------
     91 //
     92 //   build          Build the list of non-overlapping character ranges
     93 //                  from the Unicode Sets.
     94 //
     95 //------------------------------------------------------------------------
     96 void RBBISetBuilder::buildRanges() {
     97    RBBINode        *usetNode;
     98    RangeDescriptor *rlRange;
     99 
    100    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();}
    101 
    102    //
    103    //  Initialize the process by creating a single range encompassing all characters
    104    //  that is in no sets.
    105    //
    106    fRangeList                = new RangeDescriptor(*fStatus); // will check for status here
    107    if (fRangeList == nullptr) {
    108        *fStatus = U_MEMORY_ALLOCATION_ERROR;
    109        return;
    110    }
    111    fRangeList->fStartChar    = 0;
    112    fRangeList->fEndChar      = 0x10ffff;
    113 
    114    if (U_FAILURE(*fStatus)) {
    115        return;
    116    }
    117 
    118    //
    119    //  Find the set of non-overlapping ranges of characters
    120    //
    121    int  ni;
    122    for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
    123        usetNode = static_cast<RBBINode*>(this->fRB->fUSetNodes->elementAt(ni));
    124        if (usetNode==nullptr) {
    125            break;
    126        }
    127 
    128        UnicodeSet      *inputSet             = usetNode->fInputSet;
    129        int32_t          inputSetRangeCount   = inputSet->getRangeCount();
    130        int              inputSetRangeIndex   = 0;
    131                         rlRange              = fRangeList;
    132 
    133        for (;;) {
    134            if (inputSetRangeIndex >= inputSetRangeCount) {
    135                break;
    136            }
    137            UChar32      inputSetRangeBegin  = inputSet->getRangeStart(inputSetRangeIndex);
    138            UChar32      inputSetRangeEnd    = inputSet->getRangeEnd(inputSetRangeIndex);
    139 
    140            // skip over ranges from the range list that are completely
    141            //   below the current range from the input unicode set.
    142            while (rlRange->fEndChar < inputSetRangeBegin) {
    143                rlRange = rlRange->fNext;
    144            }
    145 
    146            // If the start of the range from the range list is before with
    147            //   the start of the range from the unicode set, split the range list range
    148            //   in two, with one part being before (wholly outside of) the unicode set
    149            //   and the other containing the rest.
    150            //   Then continue the loop; the post-split current range will then be skipped
    151            //     over
    152            if (rlRange->fStartChar < inputSetRangeBegin) {
    153                rlRange->split(inputSetRangeBegin, *fStatus);
    154                if (U_FAILURE(*fStatus)) {
    155                    return;
    156                }
    157                continue;
    158            }
    159 
    160            // Same thing at the end of the ranges...
    161            // If the end of the range from the range list doesn't coincide with
    162            //   the end of the range from the unicode set, split the range list
    163            //   range in two.  The first part of the split range will be
    164            //   wholly inside the Unicode set.
    165            if (rlRange->fEndChar > inputSetRangeEnd) {
    166                rlRange->split(inputSetRangeEnd+1, *fStatus);
    167                if (U_FAILURE(*fStatus)) {
    168                    return;
    169                }
    170            }
    171 
    172            // The current rlRange is now entirely within the UnicodeSet range.
    173            // Add this unicode set to the list of sets for this rlRange
    174            if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
    175                rlRange->fIncludesSets->addElement(usetNode, *fStatus);
    176                if (U_FAILURE(*fStatus)) {
    177                    return;
    178                }
    179            }
    180 
    181            // Advance over ranges that we are finished with.
    182            if (inputSetRangeEnd == rlRange->fEndChar) {
    183                inputSetRangeIndex++;
    184            }
    185            rlRange = rlRange->fNext;
    186        }
    187    }
    188 
    189    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
    190 
    191    //
    192    //  Group the above ranges, with each group consisting of one or more
    193    //    ranges that are in exactly the same set of original UnicodeSets.
    194    //    The groups are numbered, and these group numbers are the set of
    195    //    input symbols recognized by the run-time state machine.
    196    //
    197    //    Numbering: # 0  (state table column 0) is unused.
    198    //               # 1  is reserved - table column 1 is for end-of-input
    199    //               # 2  is reserved - table column 2 is for beginning-of-input
    200    //               # 3  is the first range list.
    201    //
    202    RangeDescriptor *rlSearchRange;
    203    int32_t dictGroupCount = 0;
    204 
    205    for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
    206        for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
    207            if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
    208                rlRange->fNum = rlSearchRange->fNum;
    209                rlRange->fIncludesDict = rlSearchRange->fIncludesDict;
    210                break;
    211            }
    212        }
    213        if (rlRange->fNum == 0) {
    214            rlRange->fFirstInGroup = true;
    215            if (rlRange->isDictionaryRange()) {
    216                rlRange->fNum = ++dictGroupCount;
    217                rlRange->fIncludesDict = true;
    218            } else {
    219                fGroupCount++;
    220                rlRange->fNum = fGroupCount+2;
    221                addValToSets(rlRange->fIncludesSets, rlRange->fNum);
    222            }
    223        }
    224    }
    225 
    226    // Move the character category numbers for any dictionary ranges up, so that they
    227    // immediately follow the non-dictionary ranges.
    228 
    229    fDictCategoriesStart = fGroupCount + 3;
    230    for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
    231        if (rlRange->fIncludesDict) {
    232            rlRange->fNum += fDictCategoriesStart - 1;
    233            if (rlRange->fFirstInGroup) {
    234                addValToSets(rlRange->fIncludesSets, rlRange->fNum);
    235            }
    236        }
    237    }
    238    fGroupCount += dictGroupCount;
    239 
    240 
    241    // Handle input sets that contain the special string {eof}.
    242    //   Column 1 of the state table is reserved for EOF on input.
    243    //   Column 2 is reserved for before-the-start-input.
    244    //            (This column can be optimized away later if there are no rule
    245    //             references to {bof}.)
    246    //   Add this column value (1 or 2) to the equivalent expression
    247    //     subtree for each UnicodeSet that contains the string {eof}
    248    //   Because {bof} and {eof} are not characters in the normal sense,
    249    //   they don't affect the computation of the ranges or TRIE.
    250 
    251    UnicodeString eofString(u"eof");
    252    UnicodeString bofString(u"bof");
    253    for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
    254        usetNode = static_cast<RBBINode*>(this->fRB->fUSetNodes->elementAt(ni));
    255        if (usetNode==nullptr) {
    256            break;
    257        }
    258        UnicodeSet      *inputSet = usetNode->fInputSet;
    259        if (inputSet->contains(eofString)) {
    260            addValToSet(usetNode, 1);
    261        }
    262        if (inputSet->contains(bofString)) {
    263            addValToSet(usetNode, 2);
    264            fSawBOF = true;
    265        }
    266    }
    267 
    268 
    269    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
    270    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
    271 }
    272 
    273 
    274 //
    275 // Build the Trie table for mapping UChar32 values to the corresponding
    276 // range group number.
    277 //
    278 void RBBISetBuilder::buildTrie() {
    279    fMutableTrie = umutablecptrie_open(
    280                        0,       //  Initial value for all code points.
    281                        0,       //  Error value for out-of-range input.
    282                        fStatus);
    283 
    284    for (RangeDescriptor *range = fRangeList; range!=nullptr && U_SUCCESS(*fStatus); range=range->fNext) {
    285        umutablecptrie_setRange(fMutableTrie,
    286                                range->fStartChar,     // Range start
    287                                range->fEndChar,       // Range end (inclusive)
    288                                range->fNum,           // value for range
    289                                fStatus);
    290    }
    291 }
    292 
    293 
    294 void RBBISetBuilder::mergeCategories(IntPair categories) {
    295    U_ASSERT(categories.first >= 1);
    296    U_ASSERT(categories.second > categories.first);
    297    U_ASSERT((categories.first <  fDictCategoriesStart && categories.second <  fDictCategoriesStart) ||
    298             (categories.first >= fDictCategoriesStart && categories.second >= fDictCategoriesStart));
    299 
    300    for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
    301        int32_t rangeNum = rd->fNum;
    302        if (rangeNum == categories.second) {
    303            rd->fNum = categories.first;
    304        } else if (rangeNum > categories.second) {
    305            rd->fNum--;
    306        }
    307    }
    308    --fGroupCount;
    309    if (categories.second <= fDictCategoriesStart) {
    310        --fDictCategoriesStart;
    311    }
    312 }
    313 
    314 
    315 //-----------------------------------------------------------------------------------
    316 //
    317 //  getTrieSize()    Return the size that will be required to serialize the Trie.
    318 //
    319 //-----------------------------------------------------------------------------------
    320 int32_t RBBISetBuilder::getTrieSize()  {
    321    if (U_FAILURE(*fStatus)) {
    322        return 0;
    323    }
    324    if (fTrie == nullptr) {
    325        bool use8Bits = getNumCharCategories() <= kMaxCharCategoriesFor8BitsTrie;
    326        fTrie = umutablecptrie_buildImmutable(
    327            fMutableTrie,
    328            UCPTRIE_TYPE_FAST,
    329            use8Bits ? UCPTRIE_VALUE_BITS_8 : UCPTRIE_VALUE_BITS_16,
    330            fStatus);
    331        UErrorCode bufferStatus = *fStatus;
    332        fTrieSize = ucptrie_toBinary(fTrie, nullptr, 0, &bufferStatus);
    333        if (bufferStatus != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(bufferStatus)) {
    334            *fStatus = bufferStatus;
    335        }
    336    }
    337    return fTrieSize;
    338 }
    339 
    340 
    341 //-----------------------------------------------------------------------------------
    342 //
    343 //  serializeTrie()   Put the serialized trie at the specified address.
    344 //                    Trust the caller to have given us enough memory.
    345 //                    getTrieSize() MUST be called first.
    346 //
    347 //-----------------------------------------------------------------------------------
    348 void RBBISetBuilder::serializeTrie(uint8_t *where) {
    349    ucptrie_toBinary(fTrie,
    350                     where,                // Buffer
    351                     fTrieSize,            // Capacity
    352                     fStatus);
    353 }
    354 
    355 //------------------------------------------------------------------------
    356 //
    357 //  addValToSets     Add a runtime-mapped input value to each uset from a
    358 //                   list of uset nodes. (val corresponds to a state table column.)
    359 //                   For each of the original Unicode sets - which correspond
    360 //                   directly to uset nodes - a logically equivalent expression
    361 //                   is constructed in terms of the remapped runtime input
    362 //                   symbol set.  This function adds one runtime input symbol to
    363 //                   a list of sets.
    364 //
    365 //                   The "logically equivalent expression" is the tree for an
    366 //                   or-ing together of all of the symbols that go into the set.
    367 //
    368 //------------------------------------------------------------------------
    369 void  RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
    370    int32_t       ix;
    371 
    372    for (ix=0; ix<sets->size(); ix++) {
    373        RBBINode* usetNode = static_cast<RBBINode*>(sets->elementAt(ix));
    374        addValToSet(usetNode, val);
    375    }
    376 }
    377 
    378 void  RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
    379    RBBINode *leafNode = new RBBINode(RBBINode::leafChar, *fStatus);
    380    if (U_FAILURE(*fStatus)) {
    381        delete leafNode;
    382        return;
    383    }
    384    if (leafNode == nullptr) {
    385        *fStatus = U_MEMORY_ALLOCATION_ERROR;
    386        return;
    387    }
    388    leafNode->fVal = static_cast<unsigned short>(val);
    389    if (usetNode->fLeftChild == nullptr) {
    390        usetNode->fLeftChild = leafNode;
    391        leafNode->fParent    = usetNode;
    392    } else {
    393        // There are already input symbols present for this set.
    394        // Set up an OR node, with the previous stuff as the left child
    395        //   and the new value as the right child.
    396        RBBINode *orNode = new RBBINode(RBBINode::opOr, *fStatus);
    397        if (orNode == nullptr) {
    398            *fStatus = U_MEMORY_ALLOCATION_ERROR;
    399        }
    400        if (U_FAILURE(*fStatus)) {
    401            delete orNode;
    402            delete leafNode;
    403            return;
    404        }
    405        orNode->fLeftChild  = usetNode->fLeftChild;
    406        orNode->fRightChild = leafNode;
    407        orNode->fLeftChild->fParent  = orNode;
    408        orNode->fRightChild->fParent = orNode;
    409        usetNode->fLeftChild = orNode;
    410        orNode->fParent = usetNode;
    411    }
    412 }
    413 
    414 
    415 //------------------------------------------------------------------------
    416 //
    417 //   getNumCharCategories
    418 //
    419 //------------------------------------------------------------------------
    420 int32_t  RBBISetBuilder::getNumCharCategories() const {
    421    return fGroupCount + 3;
    422 }
    423 
    424 
    425 //------------------------------------------------------------------------
    426 //
    427 //   getDictCategoriesStart
    428 //
    429 //------------------------------------------------------------------------
    430 int32_t  RBBISetBuilder::getDictCategoriesStart() const {
    431    return fDictCategoriesStart;
    432 }
    433 
    434 
    435 //------------------------------------------------------------------------
    436 //
    437 //   sawBOF
    438 //
    439 //------------------------------------------------------------------------
    440 UBool  RBBISetBuilder::sawBOF() const {
    441    return fSawBOF;
    442 }
    443 
    444 
    445 //------------------------------------------------------------------------
    446 //
    447 //   getFirstChar      Given a runtime RBBI character category, find
    448 //                     the first UChar32 that is in the set of chars 
    449 //                     in the category.
    450 //------------------------------------------------------------------------
    451 UChar32  RBBISetBuilder::getFirstChar(int32_t category) const {
    452    RangeDescriptor   *rlRange;
    453    UChar32 retVal = static_cast<UChar32>(-1);
    454    for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
    455        if (rlRange->fNum == category) {
    456            retVal = rlRange->fStartChar;
    457            break;
    458        }
    459    }
    460    return retVal;
    461 }
    462 
    463 
    464 //------------------------------------------------------------------------
    465 //
    466 //   printRanges        A debugging function.
    467 //                      dump out all of the range definitions.
    468 //
    469 //------------------------------------------------------------------------
    470 #ifdef RBBI_DEBUG
    471 void RBBISetBuilder::printRanges() {
    472    RangeDescriptor       *rlRange;
    473    int                    i;
    474 
    475    RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
    476    for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
    477        RBBIDebugPrintf("%4x-%4x  ", rlRange->fStartChar, rlRange->fEndChar);
    478 
    479        for (i=0; i<rlRange->fIncludesSets->size(); i++) {
    480            RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
    481            UnicodeString   setName {u"anon"};
    482            RBBINode       *setRef = usetNode->fParent;
    483            if (setRef != nullptr) {
    484                RBBINode *varRef = setRef->fParent;
    485                if (varRef != nullptr  &&  varRef->fType == RBBINode::varRef) {
    486                    setName = varRef->fText;
    487                }
    488            }
    489            RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf("  ");
    490        }
    491        RBBIDebugPrintf("\n");
    492    }
    493 }
    494 #endif
    495 
    496 
    497 //------------------------------------------------------------------------
    498 //
    499 //   printRangeGroups     A debugging function.
    500 //                        dump out all of the range groups.
    501 //
    502 //------------------------------------------------------------------------
    503 #ifdef RBBI_DEBUG
    504 void RBBISetBuilder::printRangeGroups() {
    505    int                    i;
    506 
    507    RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
    508    for (RangeDescriptor *rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) {
    509        if (rlRange->fFirstInGroup) {
    510            int groupNum = rlRange->fNum;
    511            RBBIDebugPrintf("%2i  ", groupNum);
    512 
    513            if (groupNum >= fDictCategoriesStart) { RBBIDebugPrintf(" <DICT> ");}
    514 
    515            for (i=0; i<rlRange->fIncludesSets->size(); i++) {
    516                RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
    517                UnicodeString   setName = UNICODE_STRING("anon", 4);
    518                RBBINode       *setRef = usetNode->fParent;
    519                if (setRef != nullptr) {
    520                    RBBINode *varRef = setRef->fParent;
    521                    if (varRef != nullptr  &&  varRef->fType == RBBINode::varRef) {
    522                        setName = varRef->fText;
    523                    }
    524                }
    525                RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
    526            }
    527 
    528            i = 0;
    529            for (RangeDescriptor *tRange = rlRange; tRange != nullptr; tRange = tRange->fNext) {
    530                if (tRange->fNum == rlRange->fNum) {
    531                    if (i++ % 5 == 0) {
    532                        RBBIDebugPrintf("\n    ");
    533                    }
    534                    RBBIDebugPrintf("  %05x-%05x", tRange->fStartChar, tRange->fEndChar);
    535                }
    536            }
    537            RBBIDebugPrintf("\n");
    538        }
    539    }
    540    RBBIDebugPrintf("\n");
    541 }
    542 #endif
    543 
    544 
    545 //------------------------------------------------------------------------
    546 //
    547 //   printSets          A debugging function.
    548 //                      dump out all of the set definitions.
    549 //
    550 //------------------------------------------------------------------------
    551 #ifdef RBBI_DEBUG
    552 void RBBISetBuilder::printSets() {
    553    int                   i;
    554 
    555    RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
    556    for (i=0; ; i++) {
    557        RBBINode        *usetNode;
    558        RBBINode        *setRef;
    559        RBBINode        *varRef;
    560        UnicodeString    setName;
    561 
    562        usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
    563        if (usetNode == nullptr) {
    564            break;
    565        }
    566 
    567        RBBIDebugPrintf("%3d    ", i);
    568        setName = UNICODE_STRING("anonymous", 9);
    569        setRef = usetNode->fParent;
    570        if (setRef != nullptr) {
    571            varRef = setRef->fParent;
    572            if (varRef != nullptr  &&  varRef->fType == RBBINode::varRef) {
    573                setName = varRef->fText;
    574            }
    575        }
    576        RBBI_DEBUG_printUnicodeString(setName);
    577        RBBIDebugPrintf("   ");
    578        RBBI_DEBUG_printUnicodeString(usetNode->fText);
    579        RBBIDebugPrintf("\n");
    580        if (usetNode->fLeftChild != nullptr) {
    581            RBBINode::printTree(usetNode->fLeftChild, true);
    582        }
    583    }
    584    RBBIDebugPrintf("\n");
    585 }
    586 #endif
    587 
    588 
    589 
    590 //-------------------------------------------------------------------------------------
    591 //
    592 //  RangeDescriptor copy constructor
    593 //
    594 //-------------------------------------------------------------------------------------
    595 
    596 RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) :
    597        fStartChar(other.fStartChar), fEndChar {other.fEndChar}, fNum {other.fNum},
    598        fIncludesDict{other.fIncludesDict}, fFirstInGroup{other.fFirstInGroup} {
    599 
    600    if (U_FAILURE(status)) {
    601        return;
    602    }
    603    fIncludesSets = new UVector(status);
    604    if (this->fIncludesSets == nullptr) {
    605        status = U_MEMORY_ALLOCATION_ERROR;
    606    }
    607    if (U_FAILURE(status)) {
    608        return;
    609    }
    610 
    611    for (int32_t i=0; i<other.fIncludesSets->size(); i++) {
    612        this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status);
    613    }
    614 }
    615 
    616 
    617 //-------------------------------------------------------------------------------------
    618 //
    619 //  RangeDesriptor default constructor
    620 //
    621 //-------------------------------------------------------------------------------------
    622 RangeDescriptor::RangeDescriptor(UErrorCode &status) {
    623    if (U_FAILURE(status)) {
    624        return;
    625    }
    626    fIncludesSets = new UVector(status);
    627    if (fIncludesSets == nullptr) {
    628        status = U_MEMORY_ALLOCATION_ERROR;
    629    }
    630 }
    631 
    632 
    633 //-------------------------------------------------------------------------------------
    634 //
    635 //  RangeDesriptor Destructor
    636 //
    637 //-------------------------------------------------------------------------------------
    638 RangeDescriptor::~RangeDescriptor() {
    639    delete  fIncludesSets;
    640    fIncludesSets = nullptr;
    641 }
    642 
    643 //-------------------------------------------------------------------------------------
    644 //
    645 //  RangeDesriptor::split()
    646 //
    647 //-------------------------------------------------------------------------------------
    648 void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
    649    U_ASSERT(where>fStartChar && where<=fEndChar);
    650    RangeDescriptor *nr = new RangeDescriptor(*this, status);
    651    if(nr == nullptr) {
    652        status = U_MEMORY_ALLOCATION_ERROR;
    653        return;
    654    }
    655    if (U_FAILURE(status)) {
    656        delete nr;
    657        return;
    658    }
    659    //  RangeDescriptor copy constructor copies all fields.
    660    //  Only need to update those that are different after the split.
    661    nr->fStartChar = where;
    662    this->fEndChar = where-1;
    663    nr->fNext      = this->fNext;
    664    this->fNext    = nr;
    665 }
    666 
    667 
    668 //-------------------------------------------------------------------------------------
    669 //
    670 //   RangeDescriptor::isDictionaryRange
    671 //
    672 //            Test whether this range includes characters from
    673 //            the original Unicode Set named "dictionary".
    674 //
    675 //            This function looks through the Unicode Sets that
    676 //            the range includes, checking for one named "dictionary"
    677 //
    678 //            TODO:  a faster way would be to find the set node for
    679 //                   "dictionary" just once, rather than looking it
    680 //                   up by name every time.
    681 //
    682 //-------------------------------------------------------------------------------------
    683 bool RangeDescriptor::isDictionaryRange() {
    684    static const char16_t *dictionary = u"dictionary";
    685    for (int32_t i=0; i<fIncludesSets->size(); i++) {
    686        RBBINode* usetNode = static_cast<RBBINode*>(fIncludesSets->elementAt(i));
    687        RBBINode *setRef = usetNode->fParent;
    688        if (setRef != nullptr) {
    689            RBBINode *varRef = setRef->fParent;
    690            if (varRef && varRef->fType == RBBINode::varRef) {
    691                const UnicodeString *setName = &varRef->fText;
    692                if (setName->compare(dictionary, -1) == 0) {
    693                    return true;
    694                }
    695            }
    696        }
    697    }
    698    return false;
    699 }
    700 
    701 U_NAMESPACE_END
    702 
    703 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */