tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rbbitblb.cpp (65404B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (c) 2002-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 */
      9 //
     10 //  rbbitblb.cpp
     11 //
     12 
     13 
     14 #include "unicode/utypes.h"
     15 
     16 #if !UCONFIG_NO_BREAK_ITERATION
     17 
     18 #include "unicode/unistr.h"
     19 #include "rbbitblb.h"
     20 #include "rbbirb.h"
     21 #include "rbbiscan.h"
     22 #include "rbbisetb.h"
     23 #include "rbbidata.h"
     24 #include "cstring.h"
     25 #include "uassert.h"
     26 #include "uvectr32.h"
     27 #include "cmemory.h"
     28 
     29 U_NAMESPACE_BEGIN
     30 
     31 const int32_t kMaxStateFor8BitsTable = 255;
     32 
     33 RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) :
     34        fRB(rb),
     35        fTree(*rootNode),
     36        fStatus(&status),
     37        fDStates(nullptr),
     38        fSafeTable(nullptr) {
     39    if (U_FAILURE(status)) {
     40        return;
     41    }
     42    // fDStates is UVector<RBBIStateDescriptor *>
     43    fDStates = new UVector(status);
     44    if (U_SUCCESS(status) && fDStates == nullptr ) {
     45        status = U_MEMORY_ALLOCATION_ERROR;
     46    }
     47 }
     48 
     49 
     50 
     51 RBBITableBuilder::~RBBITableBuilder() {
     52    int i;
     53    for (i=0; i<fDStates->size(); i++) {
     54        delete static_cast<RBBIStateDescriptor*>(fDStates->elementAt(i));
     55    }
     56    delete fDStates;
     57    delete fSafeTable;
     58    delete fLookAheadRuleMap;
     59 }
     60 
     61 
     62 //-----------------------------------------------------------------------------
     63 //
     64 //   RBBITableBuilder::buildForwardTable  -  This is the main function for building
     65 //                               the DFA state transition table from the RBBI rules parse tree.
     66 //
     67 //-----------------------------------------------------------------------------
     68 void  RBBITableBuilder::buildForwardTable() {
     69 
     70    if (U_FAILURE(*fStatus)) {
     71        return;
     72    }
     73 
     74    // If there were no rules, just return.  This situation can easily arise
     75    //   for the reverse rules.
     76    if (fTree==nullptr) {
     77        return;
     78    }
     79 
     80    //
     81    // Walk through the tree, replacing any references to $variables with a copy of the
     82    //   parse tree for the substitution expression.
     83    //
     84    fTree = fTree->flattenVariables(*fStatus, 0);
     85    if (U_FAILURE(*fStatus)) {
     86        return;
     87    }
     88 #ifdef RBBI_DEBUG
     89    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
     90        RBBIDebugPuts("\nParse tree after flattening variable references.");
     91        RBBINode::printTree(fTree, true);
     92    }
     93 #endif
     94 
     95    //
     96    // If the rules contained any references to {bof} 
     97    //   add a {bof} <cat> <former root of tree> to the
     98    //   tree.  Means that all matches must start out with the 
     99    //   {bof} fake character.
    100    // 
    101    if (fRB->fSetBuilder->sawBOF()) {
    102        RBBINode *bofTop    = new RBBINode(RBBINode::opCat, *fStatus);
    103        if (bofTop == nullptr) {
    104            *fStatus = U_MEMORY_ALLOCATION_ERROR;
    105        }
    106        if (U_FAILURE(*fStatus)) {
    107            delete bofTop;
    108            return;
    109        }
    110        RBBINode *bofLeaf   = new RBBINode(RBBINode::leafChar, *fStatus);
    111        // Delete and exit if memory allocation failed.
    112        if (bofLeaf == nullptr) {
    113            *fStatus = U_MEMORY_ALLOCATION_ERROR;
    114        }
    115        if (U_FAILURE(*fStatus)) {
    116            delete bofLeaf;
    117            delete bofTop;
    118            return;
    119        }
    120        bofTop->fLeftChild  = bofLeaf;
    121        bofTop->fRightChild = fTree;
    122        bofLeaf->fParent    = bofTop;
    123        bofLeaf->fVal       = 2;      // Reserved value for {bof}.
    124        fTree               = bofTop;
    125    }
    126 
    127    //
    128    // Add a unique right-end marker to the expression.
    129    //   Appears as a cat-node, left child being the original tree,
    130    //   right child being the end marker.
    131    //
    132    RBBINode *cn = new RBBINode(RBBINode::opCat, *fStatus);
    133    // Exit if memory allocation failed.
    134    if (cn == nullptr) {
    135        *fStatus = U_MEMORY_ALLOCATION_ERROR;
    136    }
    137    if (U_FAILURE(*fStatus)) {
    138        delete cn;
    139        return;
    140    }
    141    cn->fLeftChild = fTree;
    142    fTree->fParent = cn;
    143    RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark, *fStatus);
    144    // Delete and exit if memory allocation failed.
    145    if (cn->fRightChild == nullptr) {
    146        *fStatus = U_MEMORY_ALLOCATION_ERROR;
    147    }
    148    if (U_FAILURE(*fStatus)) {
    149        delete cn;
    150        return;
    151    }
    152    cn->fRightChild->fParent = cn;
    153    fTree = cn;
    154 
    155    //
    156    //  Replace all references to UnicodeSets with the tree for the equivalent
    157    //      expression.
    158    //
    159    fTree->flattenSets(*fStatus, 0);
    160 #ifdef RBBI_DEBUG
    161    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
    162        RBBIDebugPuts("\nParse tree after flattening Unicode Set references.");
    163        RBBINode::printTree(fTree, true);
    164    }
    165 #endif
    166 
    167 
    168    //
    169    // calculate the functions nullable, firstpos, lastpos and followpos on
    170    // nodes in the parse tree.
    171    //    See the algorithm description in Aho.
    172    //    Understanding how this works by looking at the code alone will be
    173    //       nearly impossible.
    174    //
    175    calcNullable(fTree);
    176    calcFirstPos(fTree);
    177    calcLastPos(fTree);
    178    calcFollowPos(fTree);
    179    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) {
    180        RBBIDebugPuts("\n");
    181        printPosSets(fTree);
    182    }
    183 
    184    //
    185    //  For "chained" rules, modify the followPos sets
    186    //
    187    if (fRB->fChainRules) {
    188        calcChainedFollowPos(fTree, endMarkerNode);
    189    }
    190 
    191    //
    192    //  BOF (start of input) test fixup.
    193    //
    194    if (fRB->fSetBuilder->sawBOF()) {
    195        bofFixup();
    196    }
    197 
    198    //
    199    // Build the DFA state transition tables.
    200    //
    201    buildStateTable();
    202    mapLookAheadRules();
    203    flagAcceptingStates();
    204    flagLookAheadStates();
    205    flagTaggedStates();
    206 
    207    //
    208    // Update the global table of rule status {tag} values
    209    // The rule builder has a global vector of status values that are common
    210    //    for all tables.  Merge the ones from this table into the global set.
    211    //
    212    mergeRuleStatusVals();
    213 }
    214 
    215 
    216 
    217 //-----------------------------------------------------------------------------
    218 //
    219 //   calcNullable.    Impossible to explain succinctly.  See Aho, section 3.9
    220 //
    221 //-----------------------------------------------------------------------------
    222 void RBBITableBuilder::calcNullable(RBBINode *n) {
    223    if (n == nullptr) {
    224        return;
    225    }
    226    if (n->fType == RBBINode::setRef ||
    227        n->fType == RBBINode::endMark ) {
    228        // These are non-empty leaf node types.
    229        n->fNullable = false;
    230        return;
    231    }
    232 
    233    if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) {
    234        // Lookahead marker node.  It's a leaf, so no recursion on children.
    235        // It's nullable because it does not match any literal text from the input stream.
    236        n->fNullable = true;
    237        return;
    238    }
    239 
    240 
    241    // The node is not a leaf.
    242    //  Calculate nullable on its children.
    243    calcNullable(n->fLeftChild);
    244    calcNullable(n->fRightChild);
    245 
    246    // Apply functions from table 3.40 in Aho
    247    if (n->fType == RBBINode::opOr) {
    248        n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable;
    249    }
    250    else if (n->fType == RBBINode::opCat) {
    251        n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable;
    252    }
    253    else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) {
    254        n->fNullable = true;
    255    }
    256    else {
    257        n->fNullable = false;
    258    }
    259 }
    260 
    261 
    262 
    263 
    264 //-----------------------------------------------------------------------------
    265 //
    266 //   calcFirstPos.    Impossible to explain succinctly.  See Aho, section 3.9
    267 //
    268 //-----------------------------------------------------------------------------
    269 void RBBITableBuilder::calcFirstPos(RBBINode *n) {
    270    if (n == nullptr) {
    271        return;
    272    }
    273    if (n->fType == RBBINode::leafChar  ||
    274        n->fType == RBBINode::endMark   ||
    275        n->fType == RBBINode::lookAhead ||
    276        n->fType == RBBINode::tag) {
    277        // These are non-empty leaf node types.
    278        // Note: In order to maintain the sort invariant on the set,
    279        // this function should only be called on a node whose set is
    280        // empty to start with.
    281        n->fFirstPosSet->addElement(n, *fStatus);
    282        return;
    283    }
    284 
    285    // The node is not a leaf.
    286    //  Calculate firstPos on its children.
    287    calcFirstPos(n->fLeftChild);
    288    calcFirstPos(n->fRightChild);
    289 
    290    // Apply functions from table 3.40 in Aho
    291    if (n->fType == RBBINode::opOr) {
    292        setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
    293        setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
    294    }
    295    else if (n->fType == RBBINode::opCat) {
    296        setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
    297        if (n->fLeftChild->fNullable) {
    298            setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
    299        }
    300    }
    301    else if (n->fType == RBBINode::opStar ||
    302             n->fType == RBBINode::opQuestion ||
    303             n->fType == RBBINode::opPlus) {
    304        setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
    305    }
    306 }
    307 
    308 
    309 
    310 //-----------------------------------------------------------------------------
    311 //
    312 //   calcLastPos.    Impossible to explain succinctly.  See Aho, section 3.9
    313 //
    314 //-----------------------------------------------------------------------------
    315 void RBBITableBuilder::calcLastPos(RBBINode *n) {
    316    if (n == nullptr) {
    317        return;
    318    }
    319    if (n->fType == RBBINode::leafChar  ||
    320        n->fType == RBBINode::endMark   ||
    321        n->fType == RBBINode::lookAhead ||
    322        n->fType == RBBINode::tag) {
    323        // These are non-empty leaf node types.
    324        // Note: In order to maintain the sort invariant on the set,
    325        // this function should only be called on a node whose set is
    326        // empty to start with.
    327        n->fLastPosSet->addElement(n, *fStatus);
    328        return;
    329    }
    330 
    331    // The node is not a leaf.
    332    //  Calculate lastPos on its children.
    333    calcLastPos(n->fLeftChild);
    334    calcLastPos(n->fRightChild);
    335 
    336    // Apply functions from table 3.40 in Aho
    337    if (n->fType == RBBINode::opOr) {
    338        setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
    339        setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
    340    }
    341    else if (n->fType == RBBINode::opCat) {
    342        setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
    343        if (n->fRightChild->fNullable) {
    344            setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
    345        }
    346    }
    347    else if (n->fType == RBBINode::opStar     ||
    348             n->fType == RBBINode::opQuestion ||
    349             n->fType == RBBINode::opPlus) {
    350        setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
    351    }
    352 }
    353 
    354 
    355 
    356 //-----------------------------------------------------------------------------
    357 //
    358 //   calcFollowPos.    Impossible to explain succinctly.  See Aho, section 3.9
    359 //
    360 //-----------------------------------------------------------------------------
    361 void RBBITableBuilder::calcFollowPos(RBBINode *n) {
    362    if (n == nullptr ||
    363        n->fType == RBBINode::leafChar ||
    364        n->fType == RBBINode::endMark) {
    365        return;
    366    }
    367 
    368    calcFollowPos(n->fLeftChild);
    369    calcFollowPos(n->fRightChild);
    370 
    371    // Aho rule #1
    372    if (n->fType == RBBINode::opCat) {
    373        RBBINode *i;   // is 'i' in Aho's description
    374        uint32_t     ix;
    375 
    376        UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet;
    377 
    378        for (ix = 0; ix < static_cast<uint32_t>(LastPosOfLeftChild->size()); ix++) {
    379            i = static_cast<RBBINode*>(LastPosOfLeftChild->elementAt(ix));
    380            setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet);
    381        }
    382    }
    383 
    384    // Aho rule #2
    385    if (n->fType == RBBINode::opStar ||
    386        n->fType == RBBINode::opPlus) {
    387        RBBINode   *i;  // again, n and i are the names from Aho's description.
    388        uint32_t    ix;
    389 
    390        for (ix = 0; ix < static_cast<uint32_t>(n->fLastPosSet->size()); ix++) {
    391            i = static_cast<RBBINode*>(n->fLastPosSet->elementAt(ix));
    392            setAdd(i->fFollowPos, n->fFirstPosSet);
    393        }
    394    }
    395 
    396 
    397 
    398 }
    399 
    400 //-----------------------------------------------------------------------------
    401 //
    402 //    addRuleRootNodes    Recursively walk a parse tree, adding all nodes flagged
    403 //                        as roots of a rule to a destination vector.
    404 //
    405 //-----------------------------------------------------------------------------
    406 void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) {
    407    if (node == nullptr || U_FAILURE(*fStatus)) {
    408        return;
    409    }
    410    U_ASSERT(!dest->hasDeleter());
    411    if (node->fRuleRoot) {
    412        dest->addElement(node, *fStatus);
    413        // Note: rules cannot nest. If we found a rule start node,
    414        //       no child node can also be a start node.
    415        return;
    416    }
    417    addRuleRootNodes(dest, node->fLeftChild);
    418    addRuleRootNodes(dest, node->fRightChild);
    419 }
    420 
    421 //-----------------------------------------------------------------------------
    422 //
    423 //   calcChainedFollowPos.    Modify the previously calculated followPos sets
    424 //                            to implement rule chaining.  NOT described by Aho
    425 //
    426 //-----------------------------------------------------------------------------
    427 void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNode) {
    428 
    429    UVector         leafNodes(*fStatus);
    430    if (U_FAILURE(*fStatus)) {
    431        return;
    432    }
    433 
    434    // get a list all leaf nodes
    435    tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus);
    436    if (U_FAILURE(*fStatus)) {
    437        return;
    438    }
    439 
    440    // Collect all leaf nodes that can start matches for rules
    441    // with inbound chaining enabled, which is the union of the 
    442    // firstPosition sets from each of the rule root nodes.
    443    
    444    UVector ruleRootNodes(*fStatus);
    445    addRuleRootNodes(&ruleRootNodes, tree);
    446 
    447    UVector matchStartNodes(*fStatus);
    448    for (int j=0; j<ruleRootNodes.size(); ++j) {
    449        RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j));
    450        if (node->fChainIn) {
    451            setAdd(&matchStartNodes, node->fFirstPosSet);
    452        }
    453    }
    454    if (U_FAILURE(*fStatus)) {
    455        return;
    456    }
    457 
    458    int32_t  endNodeIx;
    459    int32_t  startNodeIx;
    460 
    461    for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) {
    462        RBBINode* endNode = static_cast<RBBINode*>(leafNodes.elementAt(endNodeIx));
    463 
    464        // Identify leaf nodes that correspond to overall rule match positions.
    465        // These include the endMarkNode in their followPos sets.
    466        //
    467        // Note: do not consider other end marker nodes, those that are added to
    468        //       look-ahead rules. These can't chain; a match immediately stops
    469        //       further matching. This leaves exactly one end marker node, the one
    470        //       at the end of the complete tree.
    471 
    472        if (!endNode->fFollowPos->contains(endMarkNode)) {
    473            continue;
    474        }
    475 
    476        // We've got a node that can end a match.
    477 
    478        // Now iterate over the nodes that can start a match, looking for ones
    479        //   with the same char class as our ending node.
    480        RBBINode *startNode;
    481        for (startNodeIx = 0; startNodeIx<matchStartNodes.size(); startNodeIx++) {
    482            startNode = static_cast<RBBINode*>(matchStartNodes.elementAt(startNodeIx));
    483            if (startNode->fType != RBBINode::leafChar) {
    484                continue;
    485            }
    486 
    487            if (endNode->fVal == startNode->fVal) {
    488                // The end val (character class) of one possible match is the
    489                //   same as the start of another.
    490 
    491                // Add all nodes from the followPos of the start node to the
    492                //  followPos set of the end node, which will have the effect of
    493                //  letting matches transition from a match state at endNode
    494                //  to the second char of a match starting with startNode.
    495                setAdd(endNode->fFollowPos, startNode->fFollowPos);
    496            }
    497        }
    498    }
    499 }
    500 
    501 
    502 //-----------------------------------------------------------------------------
    503 //
    504 //   bofFixup.    Fixup for state tables that include {bof} beginning of input testing.
    505 //                Do an swizzle similar to chaining, modifying the followPos set of
    506 //                the bofNode to include the followPos nodes from other {bot} nodes
    507 //                scattered through the tree.
    508 //
    509 //                This function has much in common with calcChainedFollowPos().
    510 //
    511 //-----------------------------------------------------------------------------
    512 void RBBITableBuilder::bofFixup() {
    513 
    514    if (U_FAILURE(*fStatus)) {
    515        return;
    516    }
    517 
    518    //   The parse tree looks like this ...
    519    //         fTree root  --->       <cat>
    520    //                               /     \       .
    521    //                            <cat>   <#end node>
    522    //                           /     \  .
    523    //                     <bofNode>   rest
    524    //                               of tree
    525    //
    526    //    We will be adding things to the followPos set of the <bofNode>
    527    //
    528    RBBINode  *bofNode = fTree->fLeftChild->fLeftChild;
    529    U_ASSERT(bofNode->fType == RBBINode::leafChar);
    530    U_ASSERT(bofNode->fVal == 2);
    531 
    532    // Get all nodes that can be the start a match of the user-written rules
    533    //  (excluding the fake bofNode)
    534    //  We want the nodes that can start a match in the
    535    //     part labeled "rest of tree"
    536    // 
    537    UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet;
    538 
    539    RBBINode *startNode;
    540    int       startNodeIx;
    541    for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) {
    542        startNode = static_cast<RBBINode*>(matchStartNodes->elementAt(startNodeIx));
    543        if (startNode->fType != RBBINode::leafChar) {
    544            continue;
    545        }
    546 
    547        if (startNode->fVal == bofNode->fVal) {
    548            //  We found a leaf node corresponding to a {bof} that was
    549            //    explicitly written into a rule.
    550            //  Add everything from the followPos set of this node to the
    551            //    followPos set of the fake bofNode at the start of the tree.
    552            //  
    553            setAdd(bofNode->fFollowPos, startNode->fFollowPos);
    554        }
    555    }
    556 }
    557 
    558 //-----------------------------------------------------------------------------
    559 //
    560 //   buildStateTable()    Determine the set of runtime DFA states and the
    561 //                        transition tables for these states, by the algorithm
    562 //                        of fig. 3.44 in Aho.
    563 //
    564 //                        Most of the comments are quotes of Aho's psuedo-code.
    565 //
    566 //-----------------------------------------------------------------------------
    567 void RBBITableBuilder::buildStateTable() {
    568    if (U_FAILURE(*fStatus)) {
    569        return;
    570    }
    571    RBBIStateDescriptor *failState;
    572    // Set it to nullptr to avoid uninitialized warning
    573    RBBIStateDescriptor *initialState = nullptr;
    574    //
    575    // Add a dummy state 0 - the stop state.  Not from Aho.
    576    int      lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1;
    577    failState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
    578    if (failState == nullptr) {
    579        *fStatus = U_MEMORY_ALLOCATION_ERROR;
    580        goto ExitBuildSTdeleteall;
    581    }
    582    failState->fPositions = new UVector(*fStatus);
    583    if (failState->fPositions == nullptr) {
    584        *fStatus = U_MEMORY_ALLOCATION_ERROR;
    585    }
    586    if (failState->fPositions == nullptr || U_FAILURE(*fStatus)) {
    587        goto ExitBuildSTdeleteall;
    588    }
    589    fDStates->addElement(failState, *fStatus);
    590    if (U_FAILURE(*fStatus)) {
    591        goto ExitBuildSTdeleteall;
    592    }
    593 
    594    // initially, the only unmarked state in Dstates is firstpos(root),
    595    //       where toot is the root of the syntax tree for (r)#;
    596    initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
    597    if (initialState == nullptr) {
    598        *fStatus = U_MEMORY_ALLOCATION_ERROR;
    599    }
    600    if (U_FAILURE(*fStatus)) {
    601        goto ExitBuildSTdeleteall;
    602    }
    603    initialState->fPositions = new UVector(*fStatus);
    604    if (initialState->fPositions == nullptr) {
    605        *fStatus = U_MEMORY_ALLOCATION_ERROR;
    606    }
    607    if (U_FAILURE(*fStatus)) {
    608        goto ExitBuildSTdeleteall;
    609    }
    610    setAdd(initialState->fPositions, fTree->fFirstPosSet);
    611    fDStates->addElement(initialState, *fStatus);
    612    if (U_FAILURE(*fStatus)) {
    613        goto ExitBuildSTdeleteall;
    614    }
    615 
    616    // while there is an unmarked state T in Dstates do begin
    617    for (;;) {
    618        RBBIStateDescriptor *T = nullptr;
    619        int32_t              tx;
    620        for (tx=1; tx<fDStates->size(); tx++) {
    621            RBBIStateDescriptor *temp;
    622            temp = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(tx));
    623            if (temp->fMarked == false) {
    624                T = temp;
    625                break;
    626            }
    627        }
    628        if (T == nullptr) {
    629            break;
    630        }
    631 
    632        // mark T;
    633        T->fMarked = true;
    634 
    635        // for each input symbol a do begin
    636        int32_t  a;
    637        for (a = 1; a<=lastInputSymbol; a++) {
    638            // let U be the set of positions that are in followpos(p)
    639            //    for some position p in T
    640            //    such that the symbol at position p is a;
    641            UVector    *U = nullptr;
    642            RBBINode   *p;
    643            int32_t     px;
    644            for (px=0; px<T->fPositions->size(); px++) {
    645                p = static_cast<RBBINode*>(T->fPositions->elementAt(px));
    646                if ((p->fType == RBBINode::leafChar) &&  (p->fVal == a)) {
    647                    if (U == nullptr) {
    648                        U = new UVector(*fStatus);
    649                        if (U == nullptr) {
    650                        	*fStatus = U_MEMORY_ALLOCATION_ERROR;
    651                        	goto ExitBuildSTdeleteall;
    652                        }
    653                    }
    654                    setAdd(U, p->fFollowPos);
    655                }
    656            }
    657 
    658            // if U is not empty and not in DStates then
    659            int32_t  ux = 0;
    660            UBool    UinDstates = false;
    661            if (U != nullptr) {
    662                U_ASSERT(U->size() > 0);
    663                int  ix;
    664                for (ix=0; ix<fDStates->size(); ix++) {
    665                    RBBIStateDescriptor *temp2;
    666                    temp2 = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(ix));
    667                    if (setEquals(U, temp2->fPositions)) {
    668                        delete U;
    669                        U  = temp2->fPositions;
    670                        ux = ix;
    671                        UinDstates = true;
    672                        break;
    673                    }
    674                }
    675 
    676                // Add U as an unmarked state to Dstates
    677                if (!UinDstates)
    678                {
    679                    RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
    680                    if (newState == nullptr) {
    681                    	*fStatus = U_MEMORY_ALLOCATION_ERROR;
    682                    }
    683                    if (U_FAILURE(*fStatus)) {
    684                        goto ExitBuildSTdeleteall;
    685                    }
    686                    newState->fPositions = U;
    687                    fDStates->addElement(newState, *fStatus);
    688                    if (U_FAILURE(*fStatus)) {
    689                        return;
    690                    }
    691                    ux = fDStates->size()-1;
    692                }
    693 
    694                // Dtran[T, a] := U;
    695                T->fDtran->setElementAt(ux, a);
    696            }
    697        }
    698    }
    699    return;
    700    // delete local pointers only if error occurred.
    701 ExitBuildSTdeleteall:
    702    delete initialState;
    703    delete failState;
    704 }
    705 
    706 
    707 /**
    708 * mapLookAheadRules
    709 *
    710 */
    711 void RBBITableBuilder::mapLookAheadRules() {
    712    fLookAheadRuleMap =  new UVector32(fRB->fScanner->numRules() + 1, *fStatus);
    713    if (fLookAheadRuleMap == nullptr) {
    714        *fStatus = U_MEMORY_ALLOCATION_ERROR;
    715    }
    716    if (U_FAILURE(*fStatus)) {
    717        return;
    718    }
    719    fLookAheadRuleMap->setSize(fRB->fScanner->numRules() + 1);
    720 
    721    for (int32_t n=0; n<fDStates->size(); n++) {
    722        RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
    723        int32_t laSlotForState = 0;
    724 
    725        // Establish the look-ahead slot for this state, if the state covers
    726        // any look-ahead nodes - corresponding to the '/' in look-ahead rules.
    727 
    728        // If any of the look-ahead nodes already have a slot assigned, use it,
    729        // otherwise assign a new one.
    730 
    731        bool sawLookAheadNode = false;
    732        for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
    733            RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
    734            if (node->fType != RBBINode::NodeType::lookAhead) {
    735                continue;
    736            }
    737            sawLookAheadNode = true;
    738            int32_t ruleNum = node->fVal;     // Set when rule was originally parsed.
    739            U_ASSERT(ruleNum < fLookAheadRuleMap->size());
    740            U_ASSERT(ruleNum > 0);
    741            int32_t laSlot = fLookAheadRuleMap->elementAti(ruleNum);
    742            if (laSlot != 0) {
    743                if (laSlotForState == 0) {
    744                    laSlotForState = laSlot;
    745                } else {
    746                    // TODO: figure out if this can fail, change to setting an error code if so.
    747                    U_ASSERT(laSlot == laSlotForState);
    748                }
    749            }
    750        }
    751        if (!sawLookAheadNode) {
    752            continue;
    753        }
    754 
    755        if (laSlotForState == 0) {
    756            laSlotForState = ++fLASlotsInUse;
    757        }
    758 
    759        // For each look ahead node covered by this state,
    760        // set the mapping from the node's rule number to the look ahead slot.
    761        // There can be multiple nodes/rule numbers going to the same la slot.
    762 
    763        for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
    764            RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
    765            if (node->fType != RBBINode::NodeType::lookAhead) {
    766                continue;
    767            }
    768            int32_t ruleNum = node->fVal;     // Set when rule was originally parsed.
    769            int32_t existingVal = fLookAheadRuleMap->elementAti(ruleNum);
    770            (void)existingVal;
    771            U_ASSERT(existingVal == 0 || existingVal == laSlotForState);
    772            fLookAheadRuleMap->setElementAt(laSlotForState, ruleNum);
    773        }
    774    }
    775 
    776 }
    777 
    778 //-----------------------------------------------------------------------------
    779 //
    780 //   flagAcceptingStates    Identify accepting states.
    781 //                          First get a list of all of the end marker nodes.
    782 //                          Then, for each state s,
    783 //                              if s contains one of the end marker nodes in its list of tree positions then
    784 //                                  s is an accepting state.
    785 //
    786 //-----------------------------------------------------------------------------
    787 void     RBBITableBuilder::flagAcceptingStates() {
    788    if (U_FAILURE(*fStatus)) {
    789        return;
    790    }
    791    UVector     endMarkerNodes(*fStatus);
    792    RBBINode    *endMarker;
    793    int32_t     i;
    794    int32_t     n;
    795 
    796    if (U_FAILURE(*fStatus)) {
    797        return;
    798    }
    799 
    800    fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus);
    801    if (U_FAILURE(*fStatus)) {
    802        return;
    803    }
    804 
    805    for (i=0; i<endMarkerNodes.size(); i++) {
    806        endMarker = static_cast<RBBINode*>(endMarkerNodes.elementAt(i));
    807        for (n=0; n<fDStates->size(); n++) {
    808            RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
    809            if (sd->fPositions->indexOf(endMarker) >= 0) {
    810                // Any non-zero value for fAccepting means this is an accepting node.
    811                // The value is what will be returned to the user as the break status.
    812                // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1).
    813 
    814                if (sd->fAccepting==0) {
    815                    // State hasn't been marked as accepting yet.  Do it now.
    816                    sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
    817                    if (sd->fAccepting == 0) {
    818                        sd->fAccepting = ACCEPTING_UNCONDITIONAL;
    819                    }
    820                }
    821                if (sd->fAccepting==ACCEPTING_UNCONDITIONAL && endMarker->fVal != 0) {
    822                    // Both lookahead and non-lookahead accepting for this state.
    823                    // Favor the look-ahead, because a look-ahead match needs to
    824                    // immediately stop the run-time engine. First match, not longest.
    825                    sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
    826                }
    827                // implicit else:
    828                // if sd->fAccepting already had a value other than 0 or 1, leave it be.
    829            }
    830        }
    831    }
    832 }
    833 
    834 
    835 //-----------------------------------------------------------------------------
    836 //
    837 //    flagLookAheadStates   Very similar to flagAcceptingStates, above.
    838 //
    839 //-----------------------------------------------------------------------------
    840 void     RBBITableBuilder::flagLookAheadStates() {
    841    if (U_FAILURE(*fStatus)) {
    842        return;
    843    }
    844    UVector     lookAheadNodes(*fStatus);
    845    RBBINode    *lookAheadNode;
    846    int32_t     i;
    847    int32_t     n;
    848 
    849    fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus);
    850    if (U_FAILURE(*fStatus)) {
    851        return;
    852    }
    853    for (i=0; i<lookAheadNodes.size(); i++) {
    854        lookAheadNode = static_cast<RBBINode*>(lookAheadNodes.elementAt(i));
    855        U_ASSERT(lookAheadNode->fType == RBBINode::NodeType::lookAhead);
    856 
    857        for (n=0; n<fDStates->size(); n++) {
    858            RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
    859            int32_t positionsIdx = sd->fPositions->indexOf(lookAheadNode);
    860            if (positionsIdx >= 0) {
    861                U_ASSERT(lookAheadNode == sd->fPositions->elementAt(positionsIdx));
    862                uint32_t lookaheadSlot = fLookAheadRuleMap->elementAti(lookAheadNode->fVal);
    863                U_ASSERT(sd->fLookAhead == 0 || sd->fLookAhead == lookaheadSlot);
    864                // if (sd->fLookAhead != 0 && sd->fLookAhead != lookaheadSlot) {
    865                //     printf("%s:%d Bingo. sd->fLookAhead:%d   lookaheadSlot:%d\n",
    866                //            __FILE__, __LINE__, sd->fLookAhead, lookaheadSlot);
    867                // }
    868                sd->fLookAhead = lookaheadSlot;
    869            }
    870        }
    871    }
    872 }
    873 
    874 
    875 
    876 
    877 //-----------------------------------------------------------------------------
    878 //
    879 //    flagTaggedStates
    880 //
    881 //-----------------------------------------------------------------------------
    882 void     RBBITableBuilder::flagTaggedStates() {
    883    if (U_FAILURE(*fStatus)) {
    884        return;
    885    }
    886    UVector     tagNodes(*fStatus);
    887    RBBINode    *tagNode;
    888    int32_t     i;
    889    int32_t     n;
    890 
    891    if (U_FAILURE(*fStatus)) {
    892        return;
    893    }
    894    fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus);
    895    if (U_FAILURE(*fStatus)) {
    896        return;
    897    }
    898    for (i=0; i<tagNodes.size(); i++) {                   // For each tag node t (all of 'em)
    899        tagNode = static_cast<RBBINode*>(tagNodes.elementAt(i));
    900 
    901        for (n=0; n<fDStates->size(); n++) {              //    For each state  s (row in the state table)
    902            RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
    903            if (sd->fPositions->indexOf(tagNode) >= 0) {  //       if  s include the tag node t
    904                sortedAdd(&sd->fTagVals, tagNode->fVal);
    905            }
    906        }
    907    }
    908 }
    909 
    910 
    911 
    912 
    913 //-----------------------------------------------------------------------------
    914 //
    915 //  mergeRuleStatusVals
    916 //
    917 //      Update the global table of rule status {tag} values
    918 //      The rule builder has a global vector of status values that are common
    919 //      for all tables.  Merge the ones from this table into the global set.
    920 //
    921 //-----------------------------------------------------------------------------
    922 void  RBBITableBuilder::mergeRuleStatusVals() {
    923    //
    924    //  The basic outline of what happens here is this...
    925    //
    926    //    for each state in this state table
    927    //       if the status tag list for this state is in the global statuses list
    928    //           record where and
    929    //           continue with the next state
    930    //       else
    931    //           add the tag list for this state to the global list.
    932    //
    933    int i;
    934    int n;
    935 
    936    // Pre-set a single tag of {0} into the table.
    937    //   We will need this as a default, for rule sets with no explicit tagging.
    938    if (fRB->fRuleStatusVals->size() == 0) {
    939        fRB->fRuleStatusVals->addElement(1, *fStatus);  // Num of statuses in group
    940        fRB->fRuleStatusVals->addElement(static_cast<int32_t>(0), *fStatus); // and our single status of zero
    941    }
    942 
    943    //    For each state
    944    for (n=0; n<fDStates->size(); n++) {
    945        RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
    946        UVector *thisStatesTagValues = sd->fTagVals;
    947        if (thisStatesTagValues == nullptr) {
    948            // No tag values are explicitly associated with this state.
    949            //   Set the default tag value.
    950            sd->fTagsIdx = 0;
    951            continue;
    952        }
    953 
    954        // There are tag(s) associated with this state.
    955        //   fTagsIdx will be the index into the global tag list for this state's tag values.
    956        //   Initial value of -1 flags that we haven't got it set yet.
    957        sd->fTagsIdx = -1;
    958        int32_t  thisTagGroupStart = 0;   // indexes into the global rule status vals list
    959        int32_t  nextTagGroupStart = 0;
    960 
    961        // Loop runs once per group of tags in the global list
    962        while (nextTagGroupStart < fRB->fRuleStatusVals->size()) {
    963            thisTagGroupStart = nextTagGroupStart;
    964            nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1;
    965            if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) {
    966                // The number of tags for this state is different from
    967                //    the number of tags in this group from the global list.
    968                //    Continue with the next group from the global list.
    969                continue;
    970            }
    971            // The lengths match, go ahead and compare the actual tag values
    972            //    between this state and the group from the global list.
    973            for (i=0; i<thisStatesTagValues->size(); i++) {
    974                if (thisStatesTagValues->elementAti(i) !=
    975                    fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) {
    976                    // Mismatch.
    977                    break;
    978                }
    979            }
    980 
    981            if (i == thisStatesTagValues->size()) {
    982                // We found a set of tag values in the global list that match
    983                //   those for this state.  Use them.
    984                sd->fTagsIdx = thisTagGroupStart;
    985                break;
    986            }
    987        }
    988 
    989        if (sd->fTagsIdx == -1) {
    990            // No suitable entry in the global tag list already.  Add one
    991            sd->fTagsIdx = fRB->fRuleStatusVals->size();
    992            fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus);
    993            for (i=0; i<thisStatesTagValues->size(); i++) {
    994                fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus);
    995            }
    996        }
    997    }
    998 }
    999 
   1000 
   1001 
   1002 
   1003 
   1004 
   1005 
   1006 //-----------------------------------------------------------------------------
   1007 //
   1008 //  sortedAdd  Add a value to a vector of sorted values (ints).
   1009 //             Do not replicate entries; if the value is already there, do not
   1010 //                add a second one.
   1011 //             Lazily create the vector if it does not already exist.
   1012 //
   1013 //-----------------------------------------------------------------------------
   1014 void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) {
   1015    int32_t i;
   1016 
   1017    if (*vector == nullptr) {
   1018        *vector = new UVector(*fStatus);
   1019    }
   1020    if (*vector == nullptr || U_FAILURE(*fStatus)) {
   1021        return;
   1022    }
   1023    UVector *vec = *vector;
   1024    int32_t  vSize = vec->size();
   1025    for (i=0; i<vSize; i++) {
   1026        int32_t valAtI = vec->elementAti(i);
   1027        if (valAtI == val) {
   1028            // The value is already in the vector.  Don't add it again.
   1029            return;
   1030        }
   1031        if (valAtI > val) {
   1032            break;
   1033        }
   1034    }
   1035    vec->insertElementAt(val, i, *fStatus);
   1036 }
   1037 
   1038 
   1039 
   1040 //-----------------------------------------------------------------------------
   1041 //
   1042 //  setAdd     Set operation on UVector
   1043 //             dest = dest union source
   1044 //             Elements may only appear once and must be sorted.
   1045 //
   1046 //-----------------------------------------------------------------------------
   1047 void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
   1048    U_ASSERT(!dest->hasDeleter());
   1049    U_ASSERT(!source->hasDeleter());
   1050    int32_t destOriginalSize = dest->size();
   1051    int32_t sourceSize       = source->size();
   1052    int32_t di           = 0;
   1053    MaybeStackArray<void *, 16> destArray, sourceArray;  // Handle small cases without malloc
   1054    void **destPtr, **sourcePtr;
   1055    void **destLim, **sourceLim;
   1056 
   1057    if (destOriginalSize > destArray.getCapacity()) {
   1058        if (destArray.resize(destOriginalSize) == nullptr) {
   1059            return;
   1060        }
   1061    }
   1062    destPtr = destArray.getAlias();
   1063    destLim = destPtr + destOriginalSize;  // destArray.getArrayLimit()?
   1064 
   1065    if (sourceSize > sourceArray.getCapacity()) {
   1066        if (sourceArray.resize(sourceSize) == nullptr) {
   1067            return;
   1068        }
   1069    }
   1070    sourcePtr = sourceArray.getAlias();
   1071    sourceLim = sourcePtr + sourceSize;  // sourceArray.getArrayLimit()?
   1072 
   1073    // Avoid multiple "get element" calls by getting the contents into arrays
   1074    (void) dest->toArray(destPtr);
   1075    (void) source->toArray(sourcePtr);
   1076 
   1077    dest->setSize(sourceSize+destOriginalSize, *fStatus);
   1078    if (U_FAILURE(*fStatus)) {
   1079        return;
   1080    }
   1081 
   1082    while (sourcePtr < sourceLim && destPtr < destLim) {
   1083        if (*destPtr == *sourcePtr) {
   1084            dest->setElementAt(*sourcePtr++, di++);
   1085            destPtr++;
   1086        }
   1087        // This check is required for machines with segmented memory, like i5/OS.
   1088        // Direct pointer comparison is not recommended.
   1089        else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) {
   1090            dest->setElementAt(*destPtr++, di++);
   1091        }
   1092        else { /* *sourcePtr < *destPtr */
   1093            dest->setElementAt(*sourcePtr++, di++);
   1094        }
   1095    }
   1096 
   1097    // At most one of these two cleanup loops will execute
   1098    while (destPtr < destLim) {
   1099        dest->setElementAt(*destPtr++, di++);
   1100    }
   1101    while (sourcePtr < sourceLim) {
   1102        dest->setElementAt(*sourcePtr++, di++);
   1103    }
   1104 
   1105    dest->setSize(di, *fStatus);
   1106 }
   1107 
   1108 
   1109 
   1110 //-----------------------------------------------------------------------------
   1111 //
   1112 //  setEqual    Set operation on UVector.
   1113 //              Compare for equality.
   1114 //              Elements must be sorted.
   1115 //
   1116 //-----------------------------------------------------------------------------
   1117 UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) {
   1118    return a->equals(*b);
   1119 }
   1120 
   1121 
   1122 //-----------------------------------------------------------------------------
   1123 //
   1124 //  printPosSets   Debug function.  Dump Nullable, firstpos, lastpos and followpos
   1125 //                 for each node in the tree.
   1126 //
   1127 //-----------------------------------------------------------------------------
   1128 #ifdef RBBI_DEBUG
   1129 void RBBITableBuilder::printPosSets(RBBINode *n) {
   1130    if (n==nullptr) {
   1131        return;
   1132    }
   1133    printf("\n");
   1134    RBBINode::printNodeHeader();
   1135    RBBINode::printNode(n);
   1136    RBBIDebugPrintf("         Nullable:  %s\n", n->fNullable?"true":"false");
   1137 
   1138    RBBIDebugPrintf("         firstpos:  ");
   1139    printSet(n->fFirstPosSet);
   1140 
   1141    RBBIDebugPrintf("         lastpos:   ");
   1142    printSet(n->fLastPosSet);
   1143 
   1144    RBBIDebugPrintf("         followpos: ");
   1145    printSet(n->fFollowPos);
   1146 
   1147    printPosSets(n->fLeftChild);
   1148    printPosSets(n->fRightChild);
   1149 }
   1150 #endif
   1151 
   1152 //
   1153 //    findDuplCharClassFrom()
   1154 //
   1155 bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
   1156    int32_t numStates = fDStates->size();
   1157    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
   1158 
   1159    for (; categories->first < numCols-1; categories->first++) {
   1160        // Note: dictionary & non-dictionary columns cannot be merged.
   1161        //       The limitSecond value prevents considering mixed pairs.
   1162        //       Dictionary categories are >= DictCategoriesStart.
   1163        //       Non dict categories are   <  DictCategoriesStart.
   1164        int limitSecond = categories->first < fRB->fSetBuilder->getDictCategoriesStart() ?
   1165            fRB->fSetBuilder->getDictCategoriesStart() : numCols;
   1166        for (categories->second=categories->first+1; categories->second < limitSecond; categories->second++) {
   1167            // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates).
   1168            uint16_t table_base = 0;
   1169            uint16_t table_dupl = 1;
   1170            for (int32_t state=0; state<numStates; state++) {
   1171                RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
   1172                table_base = static_cast<uint16_t>(sd->fDtran->elementAti(categories->first));
   1173                table_dupl = static_cast<uint16_t>(sd->fDtran->elementAti(categories->second));
   1174                if (table_base != table_dupl) {
   1175                    break;
   1176                }
   1177            }
   1178            if (table_base == table_dupl) {
   1179                return true;
   1180            }
   1181        }
   1182    }
   1183    return false;
   1184 }
   1185 
   1186 
   1187 //
   1188 //    removeColumn()
   1189 //
   1190 void RBBITableBuilder::removeColumn(int32_t column) {
   1191    int32_t numStates = fDStates->size();
   1192    for (int32_t state=0; state<numStates; state++) {
   1193        RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
   1194        U_ASSERT(column < sd->fDtran->size());
   1195        sd->fDtran->removeElementAt(column);
   1196    }
   1197 }
   1198 
   1199 /*
   1200 * findDuplicateState
   1201 */
   1202 bool RBBITableBuilder::findDuplicateState(IntPair *states) {
   1203    int32_t numStates = fDStates->size();
   1204    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
   1205 
   1206    for (; states->first<numStates-1; states->first++) {
   1207        RBBIStateDescriptor* firstSD = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(states->first));
   1208        for (states->second=states->first+1; states->second<numStates; states->second++) {
   1209            RBBIStateDescriptor* duplSD = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(states->second));
   1210            if (firstSD->fAccepting != duplSD->fAccepting ||
   1211                firstSD->fLookAhead != duplSD->fLookAhead ||
   1212                firstSD->fTagsIdx   != duplSD->fTagsIdx) {
   1213                continue;
   1214            }
   1215            bool rowsMatch = true;
   1216            for (int32_t col=0; col < numCols; ++col) {
   1217                int32_t firstVal = firstSD->fDtran->elementAti(col);
   1218                int32_t duplVal = duplSD->fDtran->elementAti(col);
   1219                if (!((firstVal == duplVal) ||
   1220                        ((firstVal == states->first || firstVal == states->second) &&
   1221                        (duplVal  == states->first || duplVal  == states->second)))) {
   1222                    rowsMatch = false;
   1223                    break;
   1224                }
   1225            }
   1226            if (rowsMatch) {
   1227                return true;
   1228            }
   1229        }
   1230    }
   1231    return false;
   1232 }
   1233 
   1234 
   1235 bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
   1236    int32_t numStates = fSafeTable->size();
   1237 
   1238    for (; states->first<numStates-1; states->first++) {
   1239        UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
   1240        for (states->second=states->first+1; states->second<numStates; states->second++) {
   1241            UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
   1242            bool rowsMatch = true;
   1243            int32_t numCols = firstRow->length();
   1244            for (int32_t col=0; col < numCols; ++col) {
   1245                int32_t firstVal = firstRow->charAt(col);
   1246                int32_t duplVal = duplRow->charAt(col);
   1247                if (!((firstVal == duplVal) ||
   1248                        ((firstVal == states->first || firstVal == states->second) &&
   1249                        (duplVal  == states->first || duplVal  == states->second)))) {
   1250                    rowsMatch = false;
   1251                    break;
   1252                }
   1253            }
   1254            if (rowsMatch) {
   1255                return true;
   1256            }
   1257        }
   1258    }
   1259    return false;
   1260 }
   1261 
   1262 
   1263 void RBBITableBuilder::removeState(IntPair duplStates) {
   1264    const int32_t keepState = duplStates.first;
   1265    const int32_t duplState = duplStates.second;
   1266    U_ASSERT(keepState < duplState);
   1267    U_ASSERT(duplState < fDStates->size());
   1268 
   1269    RBBIStateDescriptor* duplSD = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(duplState));
   1270    fDStates->removeElementAt(duplState);
   1271    delete duplSD;
   1272 
   1273    int32_t numStates = fDStates->size();
   1274    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
   1275    for (int32_t state=0; state<numStates; ++state) {
   1276        RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
   1277        for (int32_t col=0; col<numCols; col++) {
   1278            int32_t existingVal = sd->fDtran->elementAti(col);
   1279            int32_t newVal = existingVal;
   1280            if (existingVal == duplState) {
   1281                newVal = keepState;
   1282            } else if (existingVal > duplState) {
   1283                newVal = existingVal - 1;
   1284            }
   1285            sd->fDtran->setElementAt(newVal, col);
   1286        }
   1287    }
   1288 }
   1289 
   1290 void RBBITableBuilder::removeSafeState(IntPair duplStates) {
   1291    const int32_t keepState = duplStates.first;
   1292    const int32_t duplState = duplStates.second;
   1293    U_ASSERT(keepState < duplState);
   1294    U_ASSERT(duplState < fSafeTable->size());
   1295 
   1296    fSafeTable->removeElementAt(duplState);   // Note that fSafeTable has a deleter function
   1297                                              // and will auto-delete the removed element.
   1298    int32_t numStates = fSafeTable->size();
   1299    for (int32_t state=0; state<numStates; ++state) {
   1300        UnicodeString* sd = static_cast<UnicodeString*>(fSafeTable->elementAt(state));
   1301        int32_t numCols = sd->length();
   1302        for (int32_t col=0; col<numCols; col++) {
   1303            int32_t existingVal = sd->charAt(col);
   1304            int32_t newVal = existingVal;
   1305            if (existingVal == duplState) {
   1306                newVal = keepState;
   1307            } else if (existingVal > duplState) {
   1308                newVal = existingVal - 1;
   1309            }
   1310            sd->setCharAt(col, static_cast<char16_t>(newVal));
   1311        }
   1312    }
   1313 }
   1314 
   1315 
   1316 /*
   1317 * RemoveDuplicateStates
   1318 */
   1319 int32_t RBBITableBuilder::removeDuplicateStates() {
   1320    IntPair dupls = {3, 0};
   1321    int32_t numStatesRemoved = 0;
   1322 
   1323    while (findDuplicateState(&dupls)) {
   1324        // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
   1325        removeState(dupls);
   1326        ++numStatesRemoved;
   1327    }
   1328    return numStatesRemoved;
   1329 }
   1330 
   1331 
   1332 //-----------------------------------------------------------------------------
   1333 //
   1334 //   getTableSize()    Calculate the size of the runtime form of this
   1335 //                     state transition table.
   1336 //
   1337 //-----------------------------------------------------------------------------
   1338 int32_t  RBBITableBuilder::getTableSize() const {
   1339    int32_t    size = 0;
   1340    int32_t    numRows;
   1341    int32_t    numCols;
   1342    int32_t    rowSize;
   1343 
   1344    if (fTree == nullptr) {
   1345        return 0;
   1346    }
   1347 
   1348    size    = offsetof(RBBIStateTable, fTableData);    // The header, with no rows to the table.
   1349 
   1350    numRows = fDStates->size();
   1351    numCols = fRB->fSetBuilder->getNumCharCategories();
   1352 
   1353    if (use8BitsForTable()) {
   1354        rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
   1355    } else {
   1356        rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
   1357    }
   1358    size   += numRows * rowSize;
   1359    return size;
   1360 }
   1361 
   1362 bool RBBITableBuilder::use8BitsForTable() const {
   1363    return fDStates->size() <= kMaxStateFor8BitsTable;
   1364 }
   1365 
   1366 //-----------------------------------------------------------------------------
   1367 //
   1368 //   exportTable()    export the state transition table in the format required
   1369 //                    by the runtime engine.  getTableSize() bytes of memory
   1370 //                    must be available at the output address "where".
   1371 //
   1372 //-----------------------------------------------------------------------------
   1373 void RBBITableBuilder::exportTable(void *where) {
   1374    RBBIStateTable* table = static_cast<RBBIStateTable*>(where);
   1375    uint32_t           state;
   1376    int                col;
   1377 
   1378    if (U_FAILURE(*fStatus) || fTree == nullptr) {
   1379        return;
   1380    }
   1381 
   1382    int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
   1383    if (catCount > 0x7fff ||
   1384        fDStates->size() > 0x7fff) {
   1385        *fStatus = U_BRK_INTERNAL_ERROR;
   1386        return;
   1387    }
   1388 
   1389    table->fNumStates = fDStates->size();
   1390    table->fDictCategoriesStart = fRB->fSetBuilder->getDictCategoriesStart();
   1391    table->fLookAheadResultsSize = fLASlotsInUse == ACCEPTING_UNCONDITIONAL ? 0 : fLASlotsInUse + 1;
   1392    table->fFlags     = 0;
   1393    if (use8BitsForTable()) {
   1394        table->fRowLen    = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
   1395        table->fFlags  |= RBBI_8BITS_ROWS;
   1396    } else {
   1397        table->fRowLen    = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
   1398    }
   1399    if (fRB->fLookAheadHardBreak) {
   1400        table->fFlags  |= RBBI_LOOKAHEAD_HARD_BREAK;
   1401    }
   1402    if (fRB->fSetBuilder->sawBOF()) {
   1403        table->fFlags  |= RBBI_BOF_REQUIRED;
   1404    }
   1405 
   1406    for (state=0; state<table->fNumStates; state++) {
   1407        RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
   1408        RBBIStateTableRow* row = reinterpret_cast<RBBIStateTableRow*>(table->fTableData + state * table->fRowLen);
   1409        if (use8BitsForTable()) {
   1410            U_ASSERT (sd->fAccepting <= 255);
   1411            U_ASSERT (sd->fLookAhead <= 255);
   1412            U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 255);
   1413            RBBIStateTableRow8* r8 = reinterpret_cast<RBBIStateTableRow8*>(row);
   1414            r8->fAccepting = sd->fAccepting;
   1415            r8->fLookAhead = sd->fLookAhead;
   1416            r8->fTagsIdx   = sd->fTagsIdx;
   1417            for (col=0; col<catCount; col++) {
   1418                U_ASSERT (sd->fDtran->elementAti(col) <= kMaxStateFor8BitsTable);
   1419                r8->fNextState[col] = sd->fDtran->elementAti(col);
   1420            }
   1421        } else {
   1422            U_ASSERT (sd->fAccepting <= 0xffff);
   1423            U_ASSERT (sd->fLookAhead <= 0xffff);
   1424            U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 0xffff);
   1425            row->r16.fAccepting = sd->fAccepting;
   1426            row->r16.fLookAhead = sd->fLookAhead;
   1427            row->r16.fTagsIdx   = sd->fTagsIdx;
   1428            for (col=0; col<catCount; col++) {
   1429                row->r16.fNextState[col] = sd->fDtran->elementAti(col);
   1430            }
   1431        }
   1432    }
   1433 }
   1434 
   1435 
   1436 /**
   1437 *   Synthesize a safe state table from the main state table.
   1438 */
   1439 void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
   1440    // The safe table creation has three steps:
   1441 
   1442    // 1. Identify pairs of character classes that are "safe." Safe means that boundaries
   1443    // following the pair do not depend on context or state before the pair. To test
   1444    // whether a pair is safe, run it through the main forward state table, starting
   1445    // from each state. If the final state is the same, no matter what the starting state,
   1446    // the pair is safe.
   1447    //
   1448    // 2. Build a state table that recognizes the safe pairs. It's similar to their
   1449    // forward table, with a column for each input character [class], and a row for
   1450    // each state. Row 1 is the start state, and row 0 is the stop state. Initially
   1451    // create an additional state for each input character category; being in
   1452    // one of these states means that the character has been seen, and is potentially
   1453    // the first of a pair. In each of these rows, the entry for the second character
   1454    // of a safe pair is set to the stop state (0), indicating that a match was found.
   1455    // All other table entries are set to the state corresponding the current input
   1456    // character, allowing that character to be the of a start following pair.
   1457    //
   1458    // Because the safe rules are to be run in reverse, moving backwards in the text,
   1459    // the first and second pair categories are swapped when building the table.
   1460    //
   1461    // 3. Compress the table. There are typically many rows (states) that are
   1462    // equivalent - that have zeroes (match completed) in the same columns -
   1463    // and can be folded together.
   1464 
   1465    // Each safe pair is stored as two UChars in the safePair string.
   1466    UnicodeString safePairs;
   1467 
   1468    int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories();
   1469    int32_t numStates = fDStates->size();
   1470 
   1471    for (int32_t c1=0; c1<numCharClasses; ++c1) {
   1472        for (int32_t c2=0; c2 < numCharClasses; ++c2) {
   1473            int32_t wantedEndState = -1;
   1474            int32_t endState = 0;
   1475            for (int32_t startState = 1; startState < numStates; ++startState) {
   1476                RBBIStateDescriptor *startStateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(startState));
   1477                int32_t s2 = startStateD->fDtran->elementAti(c1);
   1478                RBBIStateDescriptor *s2StateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(s2));
   1479                endState = s2StateD->fDtran->elementAti(c2);
   1480                if (wantedEndState < 0) {
   1481                    wantedEndState = endState;
   1482                } else {
   1483                    if (wantedEndState != endState) {
   1484                        break;
   1485                    }
   1486                }
   1487            }
   1488            if (wantedEndState == endState) {
   1489                safePairs.append(static_cast<char16_t>(c1));
   1490                safePairs.append(static_cast<char16_t>(c2));
   1491                // printf("(%d, %d) ", c1, c2);
   1492            }
   1493        }
   1494        // printf("\n");
   1495    }
   1496 
   1497    // Populate the initial safe table.
   1498    // The table as a whole is UVector<UnicodeString>
   1499    // Each row is represented by a UnicodeString, being used as a Vector<int16>.
   1500    // Row 0 is the stop state.
   1501    // Row 1 is the start state.
   1502    // Row 2 and beyond are other states, initially one per char class, but
   1503    //   after initial construction, many of the states will be combined, compacting the table.
   1504    // The String holds the nextState data only. The four leading fields of a row, fAccepting,
   1505    // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
   1506 
   1507    U_ASSERT(fSafeTable == nullptr);
   1508    LocalPointer<UVector> lpSafeTable(
   1509        new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status), status);
   1510    if (U_FAILURE(status)) {
   1511        return;
   1512    }
   1513    fSafeTable = lpSafeTable.orphan();
   1514    for (int32_t row=0; row<numCharClasses + 2; ++row) {
   1515        LocalPointer<UnicodeString> lpString(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);
   1516        fSafeTable->adoptElement(lpString.orphan(), status);
   1517    }
   1518    if (U_FAILURE(status)) {
   1519        return;
   1520    }
   1521 
   1522    // From the start state, each input char class transitions to the state for that input.
   1523    UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));
   1524    for (int32_t charClass=0; charClass < numCharClasses; ++charClass) {
   1525        // Note: +2 for the start & stop state.
   1526        startState.setCharAt(charClass, static_cast<char16_t>(charClass+2));
   1527    }
   1528 
   1529    // Initially make every other state table row look like the start state row,
   1530    for (int32_t row=2; row<numCharClasses+2; ++row) {
   1531        UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(row));
   1532        rowState = startState;   // UnicodeString assignment, copies contents.
   1533    }
   1534 
   1535    // Run through the safe pairs, set the next state to zero when pair has been seen.
   1536    // Zero being the stop state, meaning we found a safe point.
   1537    for (int32_t pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
   1538        int32_t c1 = safePairs.charAt(pairIdx);
   1539        int32_t c2 = safePairs.charAt(pairIdx + 1);
   1540 
   1541        UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(c2 + 2));
   1542        rowState.setCharAt(c1, 0);
   1543    }
   1544 
   1545    // Remove duplicate or redundant rows from the table.
   1546    IntPair states = {1, 0};
   1547    while (findDuplicateSafeState(&states)) {
   1548        // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
   1549        removeSafeState(states);
   1550    }
   1551 }
   1552 
   1553 
   1554 //-----------------------------------------------------------------------------
   1555 //
   1556 //   getSafeTableSize()    Calculate the size of the runtime form of this
   1557 //                         safe state table.
   1558 //
   1559 //-----------------------------------------------------------------------------
   1560 int32_t  RBBITableBuilder::getSafeTableSize() const {
   1561    int32_t    size = 0;
   1562    int32_t    numRows;
   1563    int32_t    numCols;
   1564    int32_t    rowSize;
   1565 
   1566    if (fSafeTable == nullptr) {
   1567        return 0;
   1568    }
   1569 
   1570    size    = offsetof(RBBIStateTable, fTableData);    // The header, with no rows to the table.
   1571 
   1572    numRows = fSafeTable->size();
   1573    numCols = fRB->fSetBuilder->getNumCharCategories();
   1574 
   1575    if (use8BitsForSafeTable()) {
   1576        rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
   1577    } else {
   1578        rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
   1579    }
   1580    size   += numRows * rowSize;
   1581    return size;
   1582 }
   1583 
   1584 bool RBBITableBuilder::use8BitsForSafeTable() const {
   1585    return fSafeTable->size() <= kMaxStateFor8BitsTable;
   1586 }
   1587 
   1588 //-----------------------------------------------------------------------------
   1589 //
   1590 //   exportSafeTable()   export the state transition table in the format required
   1591 //                       by the runtime engine.  getTableSize() bytes of memory
   1592 //                       must be available at the output address "where".
   1593 //
   1594 //-----------------------------------------------------------------------------
   1595 void RBBITableBuilder::exportSafeTable(void *where) {
   1596    RBBIStateTable* table = static_cast<RBBIStateTable*>(where);
   1597    uint32_t           state;
   1598    int                col;
   1599 
   1600    if (U_FAILURE(*fStatus) || fSafeTable == nullptr) {
   1601        return;
   1602    }
   1603 
   1604    int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
   1605    if (catCount > 0x7fff ||
   1606            fSafeTable->size() > 0x7fff) {
   1607        *fStatus = U_BRK_INTERNAL_ERROR;
   1608        return;
   1609    }
   1610 
   1611    table->fNumStates = fSafeTable->size();
   1612    table->fFlags     = 0;
   1613    if (use8BitsForSafeTable()) {
   1614        table->fRowLen    = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
   1615        table->fFlags  |= RBBI_8BITS_ROWS;
   1616    } else {
   1617        table->fRowLen    = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
   1618    }
   1619 
   1620    for (state=0; state<table->fNumStates; state++) {
   1621        UnicodeString* rowString = static_cast<UnicodeString*>(fSafeTable->elementAt(state));
   1622        RBBIStateTableRow* row = reinterpret_cast<RBBIStateTableRow*>(table->fTableData + state * table->fRowLen);
   1623        if (use8BitsForSafeTable()) {
   1624            RBBIStateTableRow8* r8 = reinterpret_cast<RBBIStateTableRow8*>(row);
   1625            r8->fAccepting = 0;
   1626            r8->fLookAhead = 0;
   1627            r8->fTagsIdx    = 0;
   1628            for (col=0; col<catCount; col++) {
   1629                U_ASSERT(rowString->charAt(col) <= kMaxStateFor8BitsTable);
   1630                r8->fNextState[col] = static_cast<uint8_t>(rowString->charAt(col));
   1631            }
   1632        } else {
   1633            row->r16.fAccepting = 0;
   1634            row->r16.fLookAhead = 0;
   1635            row->r16.fTagsIdx    = 0;
   1636            for (col=0; col<catCount; col++) {
   1637                row->r16.fNextState[col] = rowString->charAt(col);
   1638            }
   1639        }
   1640    }
   1641 }
   1642 
   1643 
   1644 
   1645 
   1646 //-----------------------------------------------------------------------------
   1647 //
   1648 //   printSet    Debug function.   Print the contents of a UVector
   1649 //
   1650 //-----------------------------------------------------------------------------
   1651 #ifdef RBBI_DEBUG
   1652 void RBBITableBuilder::printSet(UVector *s) {
   1653    int32_t  i;
   1654    for (i=0; i<s->size(); i++) {
   1655        const RBBINode *v = static_cast<const RBBINode *>(s->elementAt(i));
   1656        RBBIDebugPrintf("%5d", v==nullptr? -1 : v->fSerialNum);
   1657    }
   1658    RBBIDebugPrintf("\n");
   1659 }
   1660 #endif
   1661 
   1662 
   1663 //-----------------------------------------------------------------------------
   1664 //
   1665 //   printStates    Debug Function.  Dump the fully constructed state transition table.
   1666 //
   1667 //-----------------------------------------------------------------------------
   1668 #ifdef RBBI_DEBUG
   1669 void RBBITableBuilder::printStates() {
   1670    int     c;    // input "character"
   1671    int     n;    // state number
   1672 
   1673    RBBIDebugPrintf("state |           i n p u t     s y m b o l s \n");
   1674    RBBIDebugPrintf("      | Acc  LA    Tag");
   1675    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
   1676        RBBIDebugPrintf(" %3d", c);
   1677    }
   1678    RBBIDebugPrintf("\n");
   1679    RBBIDebugPrintf("      |---------------");
   1680    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
   1681        RBBIDebugPrintf("----");
   1682    }
   1683    RBBIDebugPrintf("\n");
   1684 
   1685    for (n=0; n<fDStates->size(); n++) {
   1686        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
   1687        RBBIDebugPrintf("  %3d | " , n);
   1688        RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx);
   1689        for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
   1690            RBBIDebugPrintf(" %3d", sd->fDtran->elementAti(c));
   1691        }
   1692        RBBIDebugPrintf("\n");
   1693    }
   1694    RBBIDebugPrintf("\n\n");
   1695 }
   1696 #endif
   1697 
   1698 
   1699 //-----------------------------------------------------------------------------
   1700 //
   1701 //   printSafeTable    Debug Function.  Dump the fully constructed safe table.
   1702 //
   1703 //-----------------------------------------------------------------------------
   1704 #ifdef RBBI_DEBUG
   1705 void RBBITableBuilder::printReverseTable() {
   1706    int     c;    // input "character"
   1707    int     n;    // state number
   1708 
   1709    RBBIDebugPrintf("    Safe Reverse Table \n");
   1710    if (fSafeTable == nullptr) {
   1711        RBBIDebugPrintf("   --- nullptr ---\n");
   1712        return;
   1713    }
   1714    RBBIDebugPrintf("state |           i n p u t     s y m b o l s \n");
   1715    RBBIDebugPrintf("      | Acc  LA    Tag");
   1716    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
   1717        RBBIDebugPrintf(" %2d", c);
   1718    }
   1719    RBBIDebugPrintf("\n");
   1720    RBBIDebugPrintf("      |---------------");
   1721    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
   1722        RBBIDebugPrintf("---");
   1723    }
   1724    RBBIDebugPrintf("\n");
   1725 
   1726    for (n=0; n<fSafeTable->size(); n++) {
   1727        UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n);
   1728        RBBIDebugPrintf("  %3d | " , n);
   1729        RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0);  // Accepting, LookAhead, Tags
   1730        for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
   1731            RBBIDebugPrintf(" %2d", rowString->charAt(c));
   1732        }
   1733        RBBIDebugPrintf("\n");
   1734    }
   1735    RBBIDebugPrintf("\n\n");
   1736 }
   1737 #endif
   1738 
   1739 
   1740 
   1741 //-----------------------------------------------------------------------------
   1742 //
   1743 //   printRuleStatusTable    Debug Function.  Dump the common rule status table
   1744 //
   1745 //-----------------------------------------------------------------------------
   1746 #ifdef RBBI_DEBUG
   1747 void RBBITableBuilder::printRuleStatusTable() {
   1748    int32_t  thisRecord = 0;
   1749    int32_t  nextRecord = 0;
   1750    int      i;
   1751    UVector  *tbl = fRB->fRuleStatusVals;
   1752 
   1753    RBBIDebugPrintf("index |  tags \n");
   1754    RBBIDebugPrintf("-------------------\n");
   1755 
   1756    while (nextRecord < tbl->size()) {
   1757        thisRecord = nextRecord;
   1758        nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1;
   1759        RBBIDebugPrintf("%4d   ", thisRecord);
   1760        for (i=thisRecord+1; i<nextRecord; i++) {
   1761            RBBIDebugPrintf("  %5d", tbl->elementAti(i));
   1762        }
   1763        RBBIDebugPrintf("\n");
   1764    }
   1765    RBBIDebugPrintf("\n\n");
   1766 }
   1767 #endif
   1768 
   1769 
   1770 //-----------------------------------------------------------------------------
   1771 //
   1772 //   RBBIStateDescriptor     Methods.  This is a very struct-like class
   1773 //                           Most access is directly to the fields.
   1774 //
   1775 //-----------------------------------------------------------------------------
   1776 
   1777 RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) {
   1778    fMarked    = false;
   1779    fAccepting = 0;
   1780    fLookAhead = 0;
   1781    fTagsIdx   = 0;
   1782    fTagVals   = nullptr;
   1783    fPositions = nullptr;
   1784    fDtran     = nullptr;
   1785 
   1786    fDtran     = new UVector32(lastInputSymbol+1, *fStatus);
   1787    if (U_FAILURE(*fStatus)) {
   1788        return;
   1789    }
   1790    if (fDtran == nullptr) {
   1791        *fStatus = U_MEMORY_ALLOCATION_ERROR;
   1792        return;
   1793    }
   1794    fDtran->setSize(lastInputSymbol+1);    // fDtran needs to be pre-sized.
   1795                                           //   It is indexed by input symbols, and will
   1796                                           //   hold  the next state number for each
   1797                                           //   symbol.
   1798 }
   1799 
   1800 
   1801 RBBIStateDescriptor::~RBBIStateDescriptor() {
   1802    delete       fPositions;
   1803    delete       fDtran;
   1804    delete       fTagVals;
   1805    fPositions = nullptr;
   1806    fDtran     = nullptr;
   1807    fTagVals   = nullptr;
   1808 }
   1809 
   1810 U_NAMESPACE_END
   1811 
   1812 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */