rbbinode.cpp (16502B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 *************************************************************************** 5 * Copyright (C) 2002-2016 International Business Machines Corporation * 6 * and others. All rights reserved. * 7 *************************************************************************** 8 */ 9 10 // 11 // File: rbbinode.cpp 12 // 13 // Implementation of class RBBINode, which represents a node in the 14 // tree generated when parsing the Rules Based Break Iterator rules. 15 // 16 // This "Class" is actually closer to a struct. 17 // Code using it is expected to directly access fields much of the time. 18 // 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_BREAK_ITERATION 23 24 #include "unicode/unistr.h" 25 #include "unicode/uniset.h" 26 #include "unicode/uchar.h" 27 #include "unicode/parsepos.h" 28 29 #include "cstr.h" 30 #include "uvector.h" 31 32 #include "rbbirb.h" 33 #include "rbbinode.h" 34 35 #include "uassert.h" 36 37 38 U_NAMESPACE_BEGIN 39 40 #ifdef RBBI_DEBUG 41 static int gLastSerial = 0; 42 #endif 43 44 45 //------------------------------------------------------------------------- 46 // 47 // Constructor. Just set the fields to reasonable default values. 48 // 49 //------------------------------------------------------------------------- 50 RBBINode::RBBINode(NodeType t, UErrorCode& status) : UMemory() { 51 if (U_FAILURE(status)) { 52 return; 53 } 54 #ifdef RBBI_DEBUG 55 fSerialNum = ++gLastSerial; 56 #endif 57 fType = t; 58 fParent = nullptr; 59 fLeftChild = nullptr; 60 fRightChild = nullptr; 61 fInputSet = nullptr; 62 fFirstPos = 0; 63 fLastPos = 0; 64 fNullable = false; 65 fLookAheadEnd = false; 66 fRuleRoot = false; 67 fChainIn = false; 68 fVal = 0; 69 fPrecedence = precZero; 70 71 fFirstPosSet = new UVector(status); 72 fLastPosSet = new UVector(status); 73 fFollowPos = new UVector(status); 74 if (U_SUCCESS(status) && 75 (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) { 76 status = U_MEMORY_ALLOCATION_ERROR; 77 } 78 if (t==opCat) {fPrecedence = precOpCat;} 79 else if (t==opOr) {fPrecedence = precOpOr;} 80 else if (t==opStart) {fPrecedence = precStart;} 81 else if (t==opLParen) {fPrecedence = precLParen;} 82 83 } 84 85 86 RBBINode::RBBINode(const RBBINode &other, UErrorCode& status) : UMemory(other) { 87 if (U_FAILURE(status)) { 88 return; 89 } 90 #ifdef RBBI_DEBUG 91 fSerialNum = ++gLastSerial; 92 #endif 93 fType = other.fType; 94 fParent = nullptr; 95 fLeftChild = nullptr; 96 fRightChild = nullptr; 97 fInputSet = other.fInputSet; 98 fPrecedence = other.fPrecedence; 99 fText = other.fText; 100 fFirstPos = other.fFirstPos; 101 fLastPos = other.fLastPos; 102 fNullable = other.fNullable; 103 fVal = other.fVal; 104 fRuleRoot = false; 105 fChainIn = other.fChainIn; 106 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere 107 fLastPosSet = new UVector(status); 108 fFollowPos = new UVector(status); 109 if (U_SUCCESS(status) && 110 (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) { 111 status = U_MEMORY_ALLOCATION_ERROR; 112 } 113 } 114 115 116 //------------------------------------------------------------------------- 117 // 118 // Destructor. Deletes both this node AND any child nodes, 119 // except in the case of variable reference nodes. For 120 // these, the l. child points back to the definition, which 121 // is common for all references to the variable, meaning 122 // it can't be deleted here. 123 // 124 //------------------------------------------------------------------------- 125 RBBINode::~RBBINode() { 126 // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); 127 delete fInputSet; 128 fInputSet = nullptr; 129 130 switch (this->fType) { 131 case varRef: 132 case setRef: 133 // for these node types, multiple instances point to the same "children" 134 // Storage ownership of children handled elsewhere. Don't delete here. 135 break; 136 137 default: 138 // Avoid using a recursive implementation because of stack overflow problems. 139 // See bug ICU-22584. 140 // delete fLeftChild; 141 NRDeleteNode(fLeftChild); 142 fLeftChild = nullptr; 143 // delete fRightChild; 144 NRDeleteNode(fRightChild); 145 fRightChild = nullptr; 146 } 147 148 delete fFirstPosSet; 149 delete fLastPosSet; 150 delete fFollowPos; 151 } 152 153 /** 154 * Non-recursive delete of a node + its children. Used from the node destructor 155 * instead of the more obvious recursive implementation to avoid problems with 156 * stack overflow with some perverse test rule data (from fuzzing). 157 */ 158 void RBBINode::NRDeleteNode(RBBINode *node) { 159 if (node == nullptr) { 160 return; 161 } 162 163 RBBINode *stopNode = node->fParent; 164 RBBINode *nextNode = node; 165 while (nextNode != stopNode && nextNode != nullptr) { 166 RBBINode *currentNode = nextNode; 167 168 if ((currentNode->fLeftChild == nullptr && currentNode->fRightChild == nullptr) || 169 currentNode->fType == varRef || // varRef and setRef nodes do not 170 currentNode->fType == setRef) { // own their children nodes. 171 // CurrentNode is effectively a leaf node; it's safe to go ahead and delete it. 172 nextNode = currentNode->fParent; 173 if (nextNode) { 174 if (nextNode->fLeftChild == currentNode) { 175 nextNode->fLeftChild = nullptr; 176 } else if (nextNode->fRightChild == currentNode) { 177 nextNode->fRightChild = nullptr; 178 } 179 } 180 delete currentNode; 181 } else if (currentNode->fLeftChild) { 182 nextNode = currentNode->fLeftChild; 183 if (nextNode->fParent == nullptr) { 184 nextNode->fParent = currentNode; 185 // fParent isn't always set; do it now if not. 186 } 187 U_ASSERT(nextNode->fParent == currentNode); 188 } else if (currentNode->fRightChild) { 189 nextNode = currentNode->fRightChild; 190 if (nextNode->fParent == nullptr) { 191 nextNode->fParent = currentNode; 192 // fParent isn't always set; do it now if not. 193 } 194 U_ASSERT(nextNode->fParent == currentNode); 195 } 196 } 197 } 198 199 //------------------------------------------------------------------------- 200 // 201 // cloneTree Make a copy of the subtree rooted at this node. 202 // Discard any variable references encountered along the way, 203 // and replace with copies of the variable's definitions. 204 // Used to replicate the expression underneath variable 205 // references in preparation for generating the DFA tables. 206 // 207 //------------------------------------------------------------------------- 208 constexpr int kRecursiveDepthLimit = 3500; 209 RBBINode *RBBINode::cloneTree(UErrorCode &status, int depth) { 210 if (U_FAILURE(status)) { 211 return nullptr; 212 } 213 // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR 214 // to avoid stack overflow crash. 215 if (depth > kRecursiveDepthLimit) { 216 status = U_INPUT_TOO_LONG_ERROR; 217 return nullptr; 218 } 219 RBBINode *n; 220 221 if (fType == RBBINode::varRef) { 222 // If the current node is a variable reference, skip over it 223 // and clone the definition of the variable instead. 224 n = fLeftChild->cloneTree(status, depth+1); 225 if (U_FAILURE(status)) { 226 return nullptr; 227 } 228 } else if (fType == RBBINode::uset) { 229 n = this; 230 } else { 231 n = new RBBINode(*this, status); 232 if (U_FAILURE(status)) { 233 delete n; 234 return nullptr; 235 } 236 // Check for null pointer. 237 if (n == nullptr) { 238 status = U_MEMORY_ALLOCATION_ERROR; 239 return nullptr; 240 } 241 if (fLeftChild != nullptr) { 242 n->fLeftChild = fLeftChild->cloneTree(status, depth+1); 243 if (U_FAILURE(status)) { 244 delete n; 245 return nullptr; 246 } 247 n->fLeftChild->fParent = n; 248 } 249 if (fRightChild != nullptr) { 250 n->fRightChild = fRightChild->cloneTree(status, depth+1); 251 if (U_FAILURE(status)) { 252 delete n; 253 return nullptr; 254 } 255 n->fRightChild->fParent = n; 256 } 257 } 258 return n; 259 } 260 261 262 263 //------------------------------------------------------------------------- 264 // 265 // flattenVariables Walk a parse tree, replacing any variable 266 // references with a copy of the variable's definition. 267 // Aside from variables, the tree is not changed. 268 // 269 // Return the root of the tree. If the root was not a variable 270 // reference, it remains unchanged - the root we started with 271 // is the root we return. If, however, the root was a variable 272 // reference, the root of the newly cloned replacement tree will 273 // be returned, and the original tree deleted. 274 // 275 // This function works by recursively walking the tree 276 // without doing anything until a variable reference is 277 // found, then calling cloneTree() at that point. Any 278 // nested references are handled by cloneTree(), not here. 279 // 280 //------------------------------------------------------------------------- 281 RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) { 282 if (U_FAILURE(status)) { 283 return this; 284 } 285 // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR 286 // to avoid stack overflow crash. 287 if (depth > kRecursiveDepthLimit) { 288 status = U_INPUT_TOO_LONG_ERROR; 289 return this; 290 } 291 if (fType == varRef) { 292 RBBINode *retNode = fLeftChild->cloneTree(status, depth+1); 293 if (U_FAILURE(status)) { 294 return this; 295 } 296 retNode->fRuleRoot = this->fRuleRoot; 297 retNode->fChainIn = this->fChainIn; 298 delete this; // TODO: undefined behavior. Fix. 299 return retNode; 300 } 301 302 if (fLeftChild != nullptr) { 303 fLeftChild = fLeftChild->flattenVariables(status, depth+1); 304 if (fLeftChild == nullptr) { 305 status = U_MEMORY_ALLOCATION_ERROR; 306 } 307 if (U_FAILURE(status)) { 308 return this; 309 } 310 fLeftChild->fParent = this; 311 } 312 if (fRightChild != nullptr) { 313 fRightChild = fRightChild->flattenVariables(status, depth+1); 314 if (fRightChild == nullptr) { 315 status = U_MEMORY_ALLOCATION_ERROR; 316 } 317 if (U_FAILURE(status)) { 318 return this; 319 } 320 fRightChild->fParent = this; 321 } 322 return this; 323 } 324 325 326 //------------------------------------------------------------------------- 327 // 328 // flattenSets Walk the parse tree, replacing any nodes of type setRef 329 // with a copy of the expression tree for the set. A set's 330 // equivalent expression tree is precomputed and saved as 331 // the left child of the uset node. 332 // 333 //------------------------------------------------------------------------- 334 void RBBINode::flattenSets(UErrorCode &status, int depth) { 335 if (U_FAILURE(status)) { 336 return; 337 } 338 // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR 339 // to avoid stack overflow crash. 340 if (depth > kRecursiveDepthLimit) { 341 status = U_INPUT_TOO_LONG_ERROR; 342 return; 343 } 344 U_ASSERT(fType != setRef); 345 346 if (fLeftChild != nullptr) { 347 if (fLeftChild->fType==setRef) { 348 RBBINode *setRefNode = fLeftChild; 349 RBBINode *usetNode = setRefNode->fLeftChild; 350 RBBINode *replTree = usetNode->fLeftChild; 351 fLeftChild = replTree->cloneTree(status, depth+1); 352 if (U_FAILURE(status)) { 353 delete setRefNode; 354 return; 355 } 356 fLeftChild->fParent = this; 357 delete setRefNode; 358 } else { 359 fLeftChild->flattenSets(status, depth+1); 360 } 361 } 362 363 if (fRightChild != nullptr) { 364 if (fRightChild->fType==setRef) { 365 RBBINode *setRefNode = fRightChild; 366 RBBINode *usetNode = setRefNode->fLeftChild; 367 RBBINode *replTree = usetNode->fLeftChild; 368 fRightChild = replTree->cloneTree(status, depth+1); 369 if (U_FAILURE(status)) { 370 delete setRefNode; 371 return; 372 } 373 fRightChild->fParent = this; 374 delete setRefNode; 375 } else { 376 fRightChild->flattenSets(status, depth+1); 377 } 378 } 379 } 380 381 382 383 //------------------------------------------------------------------------- 384 // 385 // findNodes() Locate all the nodes of the specified type, starting 386 // at the specified root. 387 // 388 //------------------------------------------------------------------------- 389 void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { 390 /* test for buffer overflows */ 391 if (U_FAILURE(status)) { 392 return; 393 } 394 U_ASSERT(!dest->hasDeleter()); 395 if (fType == kind) { 396 dest->addElement(this, status); 397 } 398 if (fLeftChild != nullptr) { 399 fLeftChild->findNodes(dest, kind, status); 400 } 401 if (fRightChild != nullptr) { 402 fRightChild->findNodes(dest, kind, status); 403 } 404 } 405 406 407 //------------------------------------------------------------------------- 408 // 409 // print. Print out a single node, for debugging. 410 // 411 //------------------------------------------------------------------------- 412 #ifdef RBBI_DEBUG 413 414 static int32_t serial(const RBBINode *node) { 415 return (node == nullptr? -1 : node->fSerialNum); 416 } 417 418 419 void RBBINode::printNode(const RBBINode *node) { 420 static const char * const nodeTypeNames[] = { 421 "setRef", 422 "uset", 423 "varRef", 424 "leafChar", 425 "lookAhead", 426 "tag", 427 "endMark", 428 "opStart", 429 "opCat", 430 "opOr", 431 "opStar", 432 "opPlus", 433 "opQuestion", 434 "opBreak", 435 "opReverse", 436 "opLParen" 437 }; 438 439 if (node==nullptr) { 440 RBBIDebugPrintf("%10p", (void *)node); 441 } else { 442 RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ", 443 (void *)node, node->fSerialNum, nodeTypeNames[node->fType], 444 node->fRuleRoot?'R':' ', node->fChainIn?'C':' ', 445 serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent), 446 node->fFirstPos, node->fVal); 447 if (node->fType == varRef) { 448 RBBI_DEBUG_printUnicodeString(node->fText); 449 } 450 } 451 RBBIDebugPrintf("\n"); 452 } 453 #endif 454 455 456 #ifdef RBBI_DEBUG 457 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) { 458 RBBIDebugPrintf("%*s", minWidth, CStr(s)()); 459 } 460 #endif 461 462 463 //------------------------------------------------------------------------- 464 // 465 // print. Print out the tree of nodes rooted at "this" 466 // 467 //------------------------------------------------------------------------- 468 #ifdef RBBI_DEBUG 469 void RBBINode::printNodeHeader() { 470 RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n"); 471 } 472 473 void RBBINode::printTree(const RBBINode *node, UBool printHeading) { 474 if (printHeading) { 475 printNodeHeader(); 476 } 477 printNode(node); 478 if (node != nullptr) { 479 // Only dump the definition under a variable reference if asked to. 480 // Unconditionally dump children of all other node types. 481 if (node->fType != varRef) { 482 if (node->fLeftChild != nullptr) { 483 printTree(node->fLeftChild, false); 484 } 485 486 if (node->fRightChild != nullptr) { 487 printTree(node->fRightChild, false); 488 } 489 } 490 } 491 } 492 #endif 493 494 495 496 U_NAMESPACE_END 497 498 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */