tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rbbinode.cpp (16502B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ***************************************************************************
      5 *   Copyright (C) 2002-2016 International Business Machines Corporation   *
      6 *   and others. All rights reserved.                                      *
      7 ***************************************************************************
      8 */
      9 
     10 //
     11 //  File:  rbbinode.cpp
     12 //
     13 //         Implementation of class RBBINode, which represents a node in the
     14 //         tree generated when parsing the Rules Based Break Iterator rules.
     15 //
     16 //         This "Class" is actually closer to a struct.
     17 //         Code using it is expected to directly access fields much of the time.
     18 //
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_BREAK_ITERATION
     23 
     24 #include "unicode/unistr.h"
     25 #include "unicode/uniset.h"
     26 #include "unicode/uchar.h"
     27 #include "unicode/parsepos.h"
     28 
     29 #include "cstr.h"
     30 #include "uvector.h"
     31 
     32 #include "rbbirb.h"
     33 #include "rbbinode.h"
     34 
     35 #include "uassert.h"
     36 
     37 
     38 U_NAMESPACE_BEGIN
     39 
     40 #ifdef RBBI_DEBUG
     41 static int  gLastSerial = 0;
     42 #endif
     43 
     44 
     45 //-------------------------------------------------------------------------
     46 //
     47 //    Constructor.   Just set the fields to reasonable default values.
     48 //
     49 //-------------------------------------------------------------------------
     50 RBBINode::RBBINode(NodeType t, UErrorCode& status) : UMemory() {
     51    if (U_FAILURE(status)) {
     52        return;
     53    }
     54 #ifdef RBBI_DEBUG
     55    fSerialNum    = ++gLastSerial;
     56 #endif
     57    fType         = t;
     58    fParent       = nullptr;
     59    fLeftChild    = nullptr;
     60    fRightChild   = nullptr;
     61    fInputSet     = nullptr;
     62    fFirstPos     = 0;
     63    fLastPos      = 0;
     64    fNullable     = false;
     65    fLookAheadEnd = false;
     66    fRuleRoot     = false;
     67    fChainIn      = false;
     68    fVal          = 0;
     69    fPrecedence   = precZero;
     70 
     71    fFirstPosSet  = new UVector(status);
     72    fLastPosSet   = new UVector(status);
     73    fFollowPos    = new UVector(status);
     74    if (U_SUCCESS(status) &&
     75        (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) {
     76        status =  U_MEMORY_ALLOCATION_ERROR;
     77    }
     78    if      (t==opCat)    {fPrecedence = precOpCat;}
     79    else if (t==opOr)     {fPrecedence = precOpOr;}
     80    else if (t==opStart)  {fPrecedence = precStart;}
     81    else if (t==opLParen) {fPrecedence = precLParen;}
     82 
     83 }
     84 
     85 
     86 RBBINode::RBBINode(const RBBINode &other, UErrorCode& status) : UMemory(other) {
     87    if (U_FAILURE(status)) {
     88        return;
     89    }
     90 #ifdef RBBI_DEBUG
     91    fSerialNum   = ++gLastSerial;
     92 #endif
     93    fType        = other.fType;
     94    fParent      = nullptr;
     95    fLeftChild   = nullptr;
     96    fRightChild  = nullptr;
     97    fInputSet    = other.fInputSet;
     98    fPrecedence  = other.fPrecedence;
     99    fText        = other.fText;
    100    fFirstPos    = other.fFirstPos;
    101    fLastPos     = other.fLastPos;
    102    fNullable    = other.fNullable;
    103    fVal         = other.fVal;
    104    fRuleRoot    = false;
    105    fChainIn     = other.fChainIn;
    106    fFirstPosSet = new UVector(status);   // TODO - get a real status from somewhere
    107    fLastPosSet  = new UVector(status);
    108    fFollowPos   = new UVector(status);
    109    if (U_SUCCESS(status) &&
    110        (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) {
    111        status =  U_MEMORY_ALLOCATION_ERROR;
    112    }
    113 }
    114 
    115 
    116 //-------------------------------------------------------------------------
    117 //
    118 //    Destructor.   Deletes both this node AND any child nodes,
    119 //                  except in the case of variable reference nodes.  For
    120 //                  these, the l. child points back to the definition, which
    121 //                  is common for all references to the variable, meaning
    122 //                  it can't be deleted here.
    123 //
    124 //-------------------------------------------------------------------------
    125 RBBINode::~RBBINode() {
    126    // printf("deleting node %8x   serial %4d\n", this, this->fSerialNum);
    127    delete fInputSet;
    128    fInputSet = nullptr;
    129 
    130    switch (this->fType) {
    131    case varRef:
    132    case setRef:
    133        // for these node types, multiple instances point to the same "children"
    134        // Storage ownership of children handled elsewhere.  Don't delete here.
    135        break;
    136 
    137    default:
    138        // Avoid using a recursive implementation because of stack overflow problems.
    139        // See bug ICU-22584.
    140        // delete        fLeftChild;
    141        NRDeleteNode(fLeftChild);
    142        fLeftChild =   nullptr;
    143        // delete        fRightChild;
    144        NRDeleteNode(fRightChild);
    145        fRightChild = nullptr;
    146    }
    147 
    148    delete fFirstPosSet;
    149    delete fLastPosSet;
    150    delete fFollowPos;
    151 }
    152 
    153 /**
    154 * Non-recursive delete of a node + its children. Used from the node destructor
    155 * instead of the more obvious recursive implementation to avoid problems with
    156 * stack overflow with some perverse test rule data (from fuzzing).
    157 */
    158 void RBBINode::NRDeleteNode(RBBINode *node) {
    159    if (node == nullptr) {
    160        return;
    161    }
    162 
    163    RBBINode *stopNode = node->fParent;
    164    RBBINode *nextNode = node;
    165    while (nextNode != stopNode && nextNode != nullptr) {
    166        RBBINode *currentNode = nextNode;
    167 
    168        if ((currentNode->fLeftChild == nullptr && currentNode->fRightChild == nullptr) ||
    169                currentNode->fType == varRef ||      // varRef and setRef nodes do not
    170                currentNode->fType == setRef) {      // own their children nodes.
    171            // CurrentNode is effectively a leaf node; it's safe to go ahead and delete it.
    172            nextNode = currentNode->fParent;
    173            if (nextNode) {
    174                if (nextNode->fLeftChild == currentNode) {
    175                    nextNode->fLeftChild = nullptr;
    176                } else if (nextNode->fRightChild == currentNode) {
    177                    nextNode->fRightChild = nullptr;
    178                }
    179            }
    180            delete currentNode;
    181        } else if (currentNode->fLeftChild) {
    182            nextNode = currentNode->fLeftChild;
    183            if (nextNode->fParent == nullptr) {
    184                nextNode->fParent = currentNode;
    185                // fParent isn't always set; do it now if not.
    186            }
    187            U_ASSERT(nextNode->fParent == currentNode);
    188        } else if (currentNode->fRightChild) {
    189            nextNode = currentNode->fRightChild;
    190            if (nextNode->fParent == nullptr) {
    191                nextNode->fParent = currentNode;
    192                // fParent isn't always set; do it now if not.
    193            }
    194            U_ASSERT(nextNode->fParent == currentNode);
    195        }
    196    }
    197 }
    198 
    199 //-------------------------------------------------------------------------
    200 //
    201 //    cloneTree     Make a copy of the subtree rooted at this node.
    202 //                  Discard any variable references encountered along the way,
    203 //                  and replace with copies of the variable's definitions.
    204 //                  Used to replicate the expression underneath variable
    205 //                  references in preparation for generating the DFA tables.
    206 //
    207 //-------------------------------------------------------------------------
    208 constexpr int kRecursiveDepthLimit = 3500;
    209 RBBINode *RBBINode::cloneTree(UErrorCode &status, int depth) {
    210    if (U_FAILURE(status)) {
    211        return nullptr;
    212    }
    213    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
    214    // to avoid stack overflow crash.
    215    if (depth > kRecursiveDepthLimit) {
    216        status = U_INPUT_TOO_LONG_ERROR;
    217        return nullptr;
    218    }
    219    RBBINode    *n;
    220 
    221    if (fType == RBBINode::varRef) {
    222        // If the current node is a variable reference, skip over it
    223        //   and clone the definition of the variable instead.
    224        n = fLeftChild->cloneTree(status, depth+1);
    225        if (U_FAILURE(status)) {
    226            return nullptr;
    227        }
    228    } else if (fType == RBBINode::uset) {
    229        n = this;
    230    } else {
    231        n = new RBBINode(*this, status);
    232        if (U_FAILURE(status)) {
    233            delete n;
    234            return nullptr;
    235        }
    236        // Check for null pointer.
    237        if (n == nullptr) {
    238            status =  U_MEMORY_ALLOCATION_ERROR;
    239            return nullptr;
    240        }
    241        if (fLeftChild != nullptr) {
    242            n->fLeftChild          = fLeftChild->cloneTree(status, depth+1);
    243            if (U_FAILURE(status)) {
    244                delete n;
    245                return nullptr;
    246            }
    247            n->fLeftChild->fParent = n;
    248        }
    249        if (fRightChild != nullptr) {
    250            n->fRightChild          = fRightChild->cloneTree(status, depth+1);
    251            if (U_FAILURE(status)) {
    252                delete n;
    253                return nullptr;
    254            }
    255            n->fRightChild->fParent = n;
    256        }
    257    }
    258    return n;
    259 }
    260 
    261 
    262 
    263 //-------------------------------------------------------------------------
    264 //
    265 //   flattenVariables   Walk a parse tree, replacing any variable
    266 //                      references with a copy of the variable's definition.
    267 //                      Aside from variables, the tree is not changed.
    268 //
    269 //                      Return the root of the tree.  If the root was not a variable
    270 //                      reference, it remains unchanged - the root we started with
    271 //                      is the root we return.  If, however, the root was a variable
    272 //                      reference, the root of the newly cloned replacement tree will
    273 //                      be returned, and the original tree deleted.
    274 //
    275 //                      This function works by recursively walking the tree
    276 //                      without doing anything until a variable reference is
    277 //                      found, then calling cloneTree() at that point.  Any
    278 //                      nested references are handled by cloneTree(), not here.
    279 //
    280 //-------------------------------------------------------------------------
    281 RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
    282    if (U_FAILURE(status)) {
    283        return this;
    284    }
    285    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
    286    // to avoid stack overflow crash.
    287    if (depth > kRecursiveDepthLimit) {
    288        status = U_INPUT_TOO_LONG_ERROR;
    289        return this;
    290    }
    291    if (fType == varRef) {
    292        RBBINode *retNode  = fLeftChild->cloneTree(status, depth+1);
    293        if (U_FAILURE(status)) {
    294            return this;
    295        }
    296        retNode->fRuleRoot = this->fRuleRoot;
    297        retNode->fChainIn  = this->fChainIn;
    298        delete this;   // TODO: undefined behavior. Fix.
    299        return retNode;
    300    }
    301 
    302    if (fLeftChild != nullptr) {
    303        fLeftChild = fLeftChild->flattenVariables(status, depth+1);
    304        if (fLeftChild == nullptr) {
    305            status = U_MEMORY_ALLOCATION_ERROR;
    306        }
    307        if (U_FAILURE(status)) {
    308            return this;
    309        }
    310        fLeftChild->fParent  = this;
    311    }
    312    if (fRightChild != nullptr) {
    313        fRightChild = fRightChild->flattenVariables(status, depth+1);
    314        if (fRightChild == nullptr) {
    315            status = U_MEMORY_ALLOCATION_ERROR;
    316        }
    317        if (U_FAILURE(status)) {
    318            return this;
    319        }
    320        fRightChild->fParent = this;
    321    }
    322    return this;
    323 }
    324 
    325 
    326 //-------------------------------------------------------------------------
    327 //
    328 //  flattenSets    Walk the parse tree, replacing any nodes of type setRef
    329 //                 with a copy of the expression tree for the set.  A set's
    330 //                 equivalent expression tree is precomputed and saved as
    331 //                 the left child of the uset node.
    332 //
    333 //-------------------------------------------------------------------------
    334 void RBBINode::flattenSets(UErrorCode &status, int depth) {
    335    if (U_FAILURE(status)) {
    336        return;
    337    }
    338    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
    339    // to avoid stack overflow crash.
    340    if (depth > kRecursiveDepthLimit) {
    341        status = U_INPUT_TOO_LONG_ERROR;
    342        return;
    343    }
    344    U_ASSERT(fType != setRef);
    345 
    346    if (fLeftChild != nullptr) {
    347        if (fLeftChild->fType==setRef) {
    348            RBBINode *setRefNode = fLeftChild;
    349            RBBINode *usetNode   = setRefNode->fLeftChild;
    350            RBBINode *replTree   = usetNode->fLeftChild;
    351            fLeftChild           = replTree->cloneTree(status, depth+1);
    352            if (U_FAILURE(status)) {
    353                delete setRefNode;
    354                return;
    355            }
    356            fLeftChild->fParent  = this;
    357            delete setRefNode;
    358        } else {
    359            fLeftChild->flattenSets(status, depth+1);
    360        }
    361    }
    362 
    363    if (fRightChild != nullptr) {
    364        if (fRightChild->fType==setRef) {
    365            RBBINode *setRefNode = fRightChild;
    366            RBBINode *usetNode   = setRefNode->fLeftChild;
    367            RBBINode *replTree   = usetNode->fLeftChild;
    368            fRightChild           = replTree->cloneTree(status, depth+1);
    369            if (U_FAILURE(status)) {
    370                delete setRefNode;
    371                return;
    372            }
    373            fRightChild->fParent  = this;
    374            delete setRefNode;
    375        } else {
    376            fRightChild->flattenSets(status, depth+1);
    377        }
    378    }
    379 }
    380 
    381 
    382 
    383 //-------------------------------------------------------------------------
    384 //
    385 //   findNodes()     Locate all the nodes of the specified type, starting
    386 //                   at the specified root.
    387 //
    388 //-------------------------------------------------------------------------
    389 void   RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
    390    /* test for buffer overflows */
    391    if (U_FAILURE(status)) {
    392        return;
    393    }
    394    U_ASSERT(!dest->hasDeleter());
    395    if (fType == kind) {
    396        dest->addElement(this, status);
    397    }
    398    if (fLeftChild != nullptr) {
    399        fLeftChild->findNodes(dest, kind, status);
    400    }
    401    if (fRightChild != nullptr) {
    402        fRightChild->findNodes(dest, kind, status);
    403    }
    404 }
    405 
    406 
    407 //-------------------------------------------------------------------------
    408 //
    409 //    print.         Print out a single node, for debugging.
    410 //
    411 //-------------------------------------------------------------------------
    412 #ifdef RBBI_DEBUG
    413 
    414 static int32_t serial(const RBBINode *node) {
    415    return (node == nullptr? -1 : node->fSerialNum);
    416 }
    417 
    418 
    419 void RBBINode::printNode(const RBBINode *node) {
    420    static const char * const nodeTypeNames[] = {
    421                "setRef",
    422                "uset",
    423                "varRef",
    424                "leafChar",
    425                "lookAhead",
    426                "tag",
    427                "endMark",
    428                "opStart",
    429                "opCat",
    430                "opOr",
    431                "opStar",
    432                "opPlus",
    433                "opQuestion",
    434                "opBreak",
    435                "opReverse",
    436                "opLParen"
    437    };
    438 
    439    if (node==nullptr) {
    440        RBBIDebugPrintf("%10p", (void *)node);
    441    } else {
    442        RBBIDebugPrintf("%10p %5d %12s %c%c  %5d       %5d     %5d       %6d     %d ",
    443            (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
    444            node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
    445            serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
    446            node->fFirstPos, node->fVal);
    447        if (node->fType == varRef) {
    448            RBBI_DEBUG_printUnicodeString(node->fText);
    449        }
    450    }
    451    RBBIDebugPrintf("\n");
    452 }
    453 #endif
    454 
    455 
    456 #ifdef RBBI_DEBUG
    457 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
    458    RBBIDebugPrintf("%*s", minWidth, CStr(s)());
    459 }
    460 #endif
    461 
    462 
    463 //-------------------------------------------------------------------------
    464 //
    465 //    print.         Print out the tree of nodes rooted at "this"
    466 //
    467 //-------------------------------------------------------------------------
    468 #ifdef RBBI_DEBUG
    469 void RBBINode::printNodeHeader() {
    470    RBBIDebugPrintf(" Address   serial        type     LeftChild  RightChild   Parent   position value\n");
    471 }
    472    
    473 void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
    474    if (printHeading) {
    475        printNodeHeader();
    476    }
    477    printNode(node);
    478    if (node != nullptr) {
    479        // Only dump the definition under a variable reference if asked to.
    480        // Unconditionally dump children of all other node types.
    481        if (node->fType != varRef) {
    482            if (node->fLeftChild != nullptr) {
    483                printTree(node->fLeftChild, false);
    484            }
    485            
    486            if (node->fRightChild != nullptr) {
    487                printTree(node->fRightChild, false);
    488            }
    489        }
    490    }
    491 }
    492 #endif
    493 
    494 
    495 
    496 U_NAMESPACE_END
    497 
    498 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */