transreg.cpp (51104B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2001-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Date Name Description 9 * 08/10/2001 aliu Creation. 10 ********************************************************************** 11 */ 12 13 #include "unicode/utypes.h" 14 #include "unicode/rep.h" 15 16 #if !UCONFIG_NO_TRANSLITERATION 17 18 #include "unicode/translit.h" 19 #include "unicode/resbund.h" 20 #include "unicode/uniset.h" 21 #include "unicode/uscript.h" 22 #include "rbt.h" 23 #include "cpdtrans.h" 24 #include "nultrans.h" 25 #include "transreg.h" 26 #include "rbt_data.h" 27 #include "rbt_pars.h" 28 #include "tridpars.h" 29 #include "charstr.h" 30 #include "uassert.h" 31 #include "locutil.h" 32 33 // Enable the following symbol to add debugging code that tracks the 34 // allocation, deletion, and use of Entry objects. BoundsChecker has 35 // reported dangling pointer errors with these objects, but I have 36 // been unable to confirm them. I suspect BoundsChecker is getting 37 // confused with pointers going into and coming out of a UHashtable, 38 // despite the hinting code that is designed to help it. 39 // #define DEBUG_MEM 40 #ifdef DEBUG_MEM 41 #include <stdio.h> 42 #endif 43 44 // char16_t constants 45 static const char16_t LOCALE_SEP = 95; // '_' 46 //static const char16_t ID_SEP = 0x002D; /*-*/ 47 //static const char16_t VARIANT_SEP = 0x002F; // '/' 48 49 // String constants 50 static const char16_t ANY[] = { 0x41, 0x6E, 0x79, 0 }; // Any 51 static const char16_t LAT[] = { 0x4C, 0x61, 0x74, 0 }; // Lat 52 53 // empty string 54 #define NO_VARIANT UnicodeString() 55 56 // initial estimate for specDAG size 57 // ICU 60 Transliterator::countAvailableSources() 58 #define SPECDAG_INIT_SIZE 149 59 60 // initial estimate for number of variant names 61 #define VARIANT_LIST_INIT_SIZE 11 62 #define VARIANT_LIST_MAX_SIZE 31 63 64 // initial estimate for availableIDs count (default estimate is 8 => multiple reallocs) 65 // ICU 60 Transliterator::countAvailableIDs() 66 #define AVAILABLE_IDS_INIT_SIZE 641 67 68 // initial estimate for number of targets for source "Any", "Lat" 69 // ICU 60 Transliterator::countAvailableTargets("Any")/("Latn") 70 #define ANY_TARGETS_INIT_SIZE 125 71 #define LAT_TARGETS_INIT_SIZE 23 72 73 /** 74 * Resource bundle key for the RuleBasedTransliterator rule. 75 */ 76 //static const char RB_RULE[] = "Rule"; 77 78 U_NAMESPACE_BEGIN 79 80 //------------------------------------------------------------------ 81 // Alias 82 //------------------------------------------------------------------ 83 84 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID, 85 const UnicodeSet* cpdFilter) : 86 ID(), 87 aliasesOrRules(theAliasID), 88 transes(nullptr), 89 compoundFilter(cpdFilter), 90 direction(UTRANS_FORWARD), 91 type(TransliteratorAlias::SIMPLE) { 92 } 93 94 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, 95 const UnicodeString& idBlocks, 96 UVector* adoptedTransliterators, 97 const UnicodeSet* cpdFilter) : 98 ID(theID), 99 aliasesOrRules(idBlocks), 100 transes(adoptedTransliterators), 101 compoundFilter(cpdFilter), 102 direction(UTRANS_FORWARD), 103 type(TransliteratorAlias::COMPOUND) { 104 } 105 106 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, 107 const UnicodeString& rules, 108 UTransDirection dir) : 109 ID(theID), 110 aliasesOrRules(rules), 111 transes(nullptr), 112 compoundFilter(nullptr), 113 direction(dir), 114 type(TransliteratorAlias::RULES) { 115 } 116 117 TransliteratorAlias::~TransliteratorAlias() { 118 delete transes; 119 } 120 121 122 Transliterator* TransliteratorAlias::create(UParseError& pe, 123 UErrorCode& ec) { 124 if (U_FAILURE(ec)) { 125 return nullptr; 126 } 127 Transliterator *t = nullptr; 128 switch (type) { 129 case SIMPLE: 130 t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec); 131 if(U_FAILURE(ec)){ 132 return nullptr; 133 } 134 if (compoundFilter != nullptr) 135 t->adoptFilter(compoundFilter->clone()); 136 break; 137 case COMPOUND: 138 { 139 // the total number of transliterators in the compound is the total number of anonymous transliterators 140 // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID 141 // block and that each pair anonymous transliterators has an ID block between them. Then we go back 142 // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which 143 // marks the position where an anonymous transliterator goes) and adjust accordingly 144 int32_t anonymousRBTs = transes->size(); 145 UnicodeString noIDBlock(static_cast<char16_t>(0xffff)); 146 noIDBlock += static_cast<char16_t>(0xffff); 147 int32_t pos = aliasesOrRules.indexOf(noIDBlock); 148 while (pos >= 0) { 149 pos = aliasesOrRules.indexOf(noIDBlock, pos + 1); 150 } 151 152 UVector transliterators(uprv_deleteUObject, nullptr, ec); 153 UnicodeString idBlock; 154 int32_t blockSeparatorPos = aliasesOrRules.indexOf(static_cast<char16_t>(0xffff)); 155 while (blockSeparatorPos >= 0) { 156 aliasesOrRules.extract(0, blockSeparatorPos, idBlock); 157 aliasesOrRules.remove(0, blockSeparatorPos + 1); 158 if (!idBlock.isEmpty()) 159 transliterators.adoptElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec); 160 if (!transes->isEmpty()) 161 transliterators.adoptElement(transes->orphanElementAt(0), ec); 162 blockSeparatorPos = aliasesOrRules.indexOf(static_cast<char16_t>(0xffff)); 163 } 164 if (!aliasesOrRules.isEmpty()) 165 transliterators.adoptElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec); 166 while (!transes->isEmpty()) 167 transliterators.adoptElement(transes->orphanElementAt(0), ec); 168 transliterators.setDeleter(nullptr); 169 170 if (U_SUCCESS(ec)) { 171 t = new CompoundTransliterator(ID, transliterators, 172 (compoundFilter ? compoundFilter->clone() : nullptr), 173 anonymousRBTs, pe, ec); 174 if (t == nullptr) { 175 ec = U_MEMORY_ALLOCATION_ERROR; 176 return nullptr; 177 } 178 } else { 179 for (int32_t i = 0; i < transliterators.size(); i++) 180 delete static_cast<Transliterator*>(transliterators.elementAt(i)); 181 } 182 } 183 break; 184 case RULES: 185 UPRV_UNREACHABLE_EXIT; // don't call create() if isRuleBased() returns true! 186 } 187 return t; 188 } 189 190 UBool TransliteratorAlias::isRuleBased() const { 191 return type == RULES; 192 } 193 194 void TransliteratorAlias::parse(TransliteratorParser& parser, 195 UParseError& pe, UErrorCode& ec) const { 196 U_ASSERT(type == RULES); 197 if (U_FAILURE(ec)) { 198 return; 199 } 200 201 parser.parse(aliasesOrRules, direction, pe, ec); 202 } 203 204 //---------------------------------------------------------------------- 205 // class TransliteratorSpec 206 //---------------------------------------------------------------------- 207 208 /** 209 * A TransliteratorSpec is a string specifying either a source or a target. In more 210 * general terms, it may also specify a variant, but we only use the 211 * Spec class for sources and targets. 212 * 213 * A Spec may be a locale or a script. If it is a locale, it has a 214 * fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where 215 * ssss is the script mapping of xx_YY_ZZZ. The Spec API methods 216 * hasFallback(), next(), and reset() iterate over this fallback 217 * sequence. 218 * 219 * The Spec class canonicalizes itself, so the locale is put into 220 * canonical form, or the script is transformed from an abbreviation 221 * to a full name. 222 */ 223 class TransliteratorSpec : public UMemory { 224 public: 225 TransliteratorSpec(const UnicodeString& spec); 226 ~TransliteratorSpec(); 227 228 const UnicodeString& get() const; 229 UBool hasFallback() const; 230 const UnicodeString& next(); 231 void reset(); 232 233 UBool isLocale() const; 234 ResourceBundle& getBundle() const; 235 236 operator const UnicodeString&() const { return get(); } 237 const UnicodeString& getTop() const { return top; } 238 239 private: 240 void setupNext(); 241 242 UnicodeString top; 243 UnicodeString spec; 244 UnicodeString nextSpec; 245 UnicodeString scriptName; 246 UBool isSpecLocale; // true if spec is a locale 247 UBool isNextLocale; // true if nextSpec is a locale 248 ResourceBundle* res; 249 250 TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class 251 TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class 252 }; 253 254 TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec) 255 : top(theSpec), 256 res(nullptr) 257 { 258 UErrorCode status = U_ZERO_ERROR; 259 Locale topLoc(""); 260 LocaleUtility::initLocaleFromName(theSpec, topLoc); 261 if (!topLoc.isBogus()) { 262 res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status); 263 /* test for nullptr */ 264 if (res == nullptr) { 265 return; 266 } 267 if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { 268 delete res; 269 res = nullptr; 270 } 271 } 272 273 // Canonicalize script name -or- do locale->script mapping 274 status = U_ZERO_ERROR; 275 static const int32_t capacity = 10; 276 UScriptCode script[capacity]={USCRIPT_INVALID_CODE}; 277 int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(), 278 script, capacity, &status); 279 if (num > 0 && script[0] != USCRIPT_INVALID_CODE) { 280 scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV); 281 } 282 283 // Canonicalize top 284 if (res != nullptr) { 285 // Canonicalize locale name 286 UnicodeString locStr; 287 LocaleUtility::initNameFromLocale(topLoc, locStr); 288 if (!locStr.isBogus()) { 289 top = locStr; 290 } 291 } else if (scriptName.length() != 0) { 292 // We are a script; use canonical name 293 top = scriptName; 294 } 295 296 // assert(spec != top); 297 reset(); 298 } 299 300 TransliteratorSpec::~TransliteratorSpec() { 301 delete res; 302 } 303 304 UBool TransliteratorSpec::hasFallback() const { 305 return nextSpec.length() != 0; 306 } 307 308 void TransliteratorSpec::reset() { 309 if (spec != top) { 310 spec = top; 311 isSpecLocale = (res != nullptr); 312 setupNext(); 313 } 314 } 315 316 void TransliteratorSpec::setupNext() { 317 isNextLocale = false; 318 if (isSpecLocale) { 319 nextSpec = spec; 320 int32_t i = nextSpec.lastIndexOf(LOCALE_SEP); 321 // If i == 0 then we have _FOO, so we fall through 322 // to the scriptName. 323 if (i > 0) { 324 nextSpec.truncate(i); 325 isNextLocale = true; 326 } else { 327 nextSpec = scriptName; // scriptName may be empty 328 } 329 } else { 330 // spec is a script, so we are at the end 331 nextSpec.truncate(0); 332 } 333 } 334 335 // Protocol: 336 // for(const UnicodeString& s(spec.get()); 337 // spec.hasFallback(); s(spec.next())) { ... 338 339 const UnicodeString& TransliteratorSpec::next() { 340 spec = nextSpec; 341 isSpecLocale = isNextLocale; 342 setupNext(); 343 return spec; 344 } 345 346 const UnicodeString& TransliteratorSpec::get() const { 347 return spec; 348 } 349 350 UBool TransliteratorSpec::isLocale() const { 351 return isSpecLocale; 352 } 353 354 ResourceBundle& TransliteratorSpec::getBundle() const { 355 return *res; 356 } 357 358 //---------------------------------------------------------------------- 359 360 #ifdef DEBUG_MEM 361 362 // Vector of Entry pointers currently in use 363 static UVector* DEBUG_entries = nullptr; 364 365 static void DEBUG_setup() { 366 if (DEBUG_entries == nullptr) { 367 UErrorCode ec = U_ZERO_ERROR; 368 DEBUG_entries = new UVector(ec); 369 } 370 } 371 372 // Caller must call DEBUG_setup first. Return index of given Entry, 373 // if it is in use (not deleted yet), or -1 if not found. 374 static int DEBUG_findEntry(TransliteratorEntry* e) { 375 for (int i=0; i<DEBUG_entries->size(); ++i) { 376 if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) { 377 return i; 378 } 379 } 380 return -1; 381 } 382 383 // Track object creation 384 static void DEBUG_newEntry(TransliteratorEntry* e) { 385 DEBUG_setup(); 386 if (DEBUG_findEntry(e) >= 0) { 387 // This should really never happen unless the heap is broken 388 printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e); 389 return; 390 } 391 UErrorCode ec = U_ZERO_ERROR; 392 DEBUG_entries->addElement(e, ec); 393 } 394 395 // Track object deletion 396 static void DEBUG_delEntry(TransliteratorEntry* e) { 397 DEBUG_setup(); 398 int i = DEBUG_findEntry(e); 399 if (i < 0) { 400 printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e); 401 return; 402 } 403 DEBUG_entries->removeElementAt(i); 404 } 405 406 // Track object usage 407 static void DEBUG_useEntry(TransliteratorEntry* e) { 408 if (e == nullptr) return; 409 DEBUG_setup(); 410 int i = DEBUG_findEntry(e); 411 if (i < 0) { 412 printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e); 413 } 414 } 415 416 #else 417 // If we're not debugging then make these macros into NOPs 418 #define DEBUG_newEntry(x) 419 #define DEBUG_delEntry(x) 420 #define DEBUG_useEntry(x) 421 #endif 422 423 //---------------------------------------------------------------------- 424 // class Entry 425 //---------------------------------------------------------------------- 426 427 /** 428 * The Entry object stores objects of different types and 429 * singleton objects as placeholders for rule-based transliterators to 430 * be built as needed. Instances of this struct can be placeholders, 431 * can represent prototype transliterators to be cloned, or can 432 * represent TransliteratorData objects. We don't support storing 433 * classes in the registry because we don't have the rtti infrastructure 434 * for it. We could easily add this if there is a need for it in the 435 * future. 436 */ 437 class TransliteratorEntry : public UMemory { 438 public: 439 enum Type { 440 RULES_FORWARD, 441 RULES_REVERSE, 442 LOCALE_RULES, 443 PROTOTYPE, 444 RBT_DATA, 445 COMPOUND_RBT, 446 ALIAS, 447 FACTORY, 448 NONE // Only used for uninitialized entries 449 } entryType; 450 // NOTE: stringArg cannot go inside the union because 451 // it has a copy constructor 452 UnicodeString stringArg; // For RULES_*, ALIAS, COMPOUND_RBT 453 int32_t intArg; // For COMPOUND_RBT, LOCALE_RULES 454 UnicodeSet* compoundFilter; // For COMPOUND_RBT 455 union { 456 Transliterator* prototype; // For PROTOTYPE 457 TransliterationRuleData* data; // For RBT_DATA 458 UVector* dataVector; // For COMPOUND_RBT 459 struct { 460 Transliterator::Factory function; 461 Transliterator::Token context; 462 } factory; // For FACTORY 463 } u; 464 TransliteratorEntry(); 465 ~TransliteratorEntry(); 466 void adoptPrototype(Transliterator* adopted); 467 void setFactory(Transliterator::Factory factory, 468 Transliterator::Token context); 469 470 private: 471 472 TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class 473 TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class 474 }; 475 476 TransliteratorEntry::TransliteratorEntry() { 477 u.prototype = nullptr; 478 compoundFilter = nullptr; 479 entryType = NONE; 480 DEBUG_newEntry(this); 481 } 482 483 TransliteratorEntry::~TransliteratorEntry() { 484 DEBUG_delEntry(this); 485 if (entryType == PROTOTYPE) { 486 delete u.prototype; 487 } else if (entryType == RBT_DATA) { 488 // The data object is shared between instances of RBT. The 489 // entry object owns it. It should only be deleted when the 490 // transliterator component is being cleaned up. Doing so 491 // invalidates any RBTs that the user has instantiated. 492 delete u.data; 493 } else if (entryType == COMPOUND_RBT) { 494 while (u.dataVector != nullptr && !u.dataVector->isEmpty()) 495 delete static_cast<TransliterationRuleData*>(u.dataVector->orphanElementAt(0)); 496 delete u.dataVector; 497 } 498 delete compoundFilter; 499 } 500 501 void TransliteratorEntry::adoptPrototype(Transliterator* adopted) { 502 if (entryType == PROTOTYPE) { 503 delete u.prototype; 504 } 505 entryType = PROTOTYPE; 506 u.prototype = adopted; 507 } 508 509 void TransliteratorEntry::setFactory(Transliterator::Factory factory, 510 Transliterator::Token context) { 511 if (entryType == PROTOTYPE) { 512 delete u.prototype; 513 } 514 entryType = FACTORY; 515 u.factory.function = factory; 516 u.factory.context = context; 517 } 518 519 // UObjectDeleter for Hashtable::setValueDeleter 520 U_CDECL_BEGIN 521 static void U_CALLCONV 522 deleteEntry(void* obj) { 523 delete (TransliteratorEntry*) obj; 524 } 525 U_CDECL_END 526 527 //---------------------------------------------------------------------- 528 // class TransliteratorRegistry: Basic public API 529 //---------------------------------------------------------------------- 530 531 TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) : 532 registry(true, status), 533 specDAG(true, SPECDAG_INIT_SIZE, status), 534 variantList(VARIANT_LIST_INIT_SIZE, status), 535 availableIDs(true, AVAILABLE_IDS_INIT_SIZE, status) 536 { 537 registry.setValueDeleter(deleteEntry); 538 variantList.setDeleter(uprv_deleteUObject); 539 variantList.setComparer(uhash_compareCaselessUnicodeString); 540 UnicodeString *emptyString = new UnicodeString(); 541 if (emptyString != nullptr) { 542 variantList.adoptElement(emptyString, status); 543 } 544 specDAG.setValueDeleter(uhash_deleteHashtable); 545 } 546 547 TransliteratorRegistry::~TransliteratorRegistry() { 548 // Through the magic of C++, everything cleans itself up 549 } 550 551 Transliterator* TransliteratorRegistry::get(const UnicodeString& ID, 552 TransliteratorAlias*& aliasReturn, 553 UErrorCode& status) { 554 U_ASSERT(aliasReturn == nullptr); 555 TransliteratorEntry *entry = find(ID); 556 return entry == nullptr ? nullptr 557 : instantiateEntry(ID, entry, aliasReturn, status); 558 } 559 560 Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID, 561 TransliteratorParser& parser, 562 TransliteratorAlias*& aliasReturn, 563 UErrorCode& status) { 564 U_ASSERT(aliasReturn == nullptr); 565 TransliteratorEntry *entry = find(ID); 566 567 if (entry == nullptr) { 568 // We get to this point if there are two threads, one of which 569 // is instantiating an ID, and another of which is removing 570 // the same ID from the registry, and the timing is just right. 571 return nullptr; 572 } 573 574 // The usage model for the caller is that they will first call 575 // reg->get() inside the mutex, they'll get back an alias, they call 576 // alias->isRuleBased(), and if they get true, they call alias->parse() 577 // outside the mutex, then reg->reget() inside the mutex again. A real 578 // mess, but it gets things working for ICU 3.0. [alan]. 579 580 // Note: It's possible that in between the caller calling 581 // alias->parse() and reg->reget(), that another thread will have 582 // called reg->reget(), and the entry will already have been fixed up. 583 // We have to detect this so we don't stomp over existing entry 584 // data members and potentially leak memory (u.data and compoundFilter). 585 586 if (entry->entryType == TransliteratorEntry::RULES_FORWARD || 587 entry->entryType == TransliteratorEntry::RULES_REVERSE || 588 entry->entryType == TransliteratorEntry::LOCALE_RULES) { 589 590 if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) { 591 entry->u.data = nullptr; 592 entry->entryType = TransliteratorEntry::ALIAS; 593 entry->stringArg = UNICODE_STRING_SIMPLE("Any-nullptr"); 594 } 595 else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) { 596 entry->u.data = static_cast<TransliterationRuleData*>(parser.dataVector.orphanElementAt(0)); 597 entry->entryType = TransliteratorEntry::RBT_DATA; 598 } 599 else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) { 600 entry->stringArg = *static_cast<UnicodeString*>(parser.idBlockVector.elementAt(0)); 601 entry->compoundFilter = parser.orphanCompoundFilter(); 602 entry->entryType = TransliteratorEntry::ALIAS; 603 } 604 else { 605 entry->entryType = TransliteratorEntry::COMPOUND_RBT; 606 entry->compoundFilter = parser.orphanCompoundFilter(); 607 entry->u.dataVector = new UVector(status); 608 // TODO ICU-21701: missing check for nullptr and failed status. 609 // Unclear how best to bail out. 610 entry->stringArg.remove(); 611 612 int32_t limit = parser.idBlockVector.size(); 613 if (parser.dataVector.size() > limit) 614 limit = parser.dataVector.size(); 615 616 for (int32_t i = 0; i < limit; i++) { 617 if (i < parser.idBlockVector.size()) { 618 UnicodeString* idBlock = static_cast<UnicodeString*>(parser.idBlockVector.elementAt(i)); 619 if (!idBlock->isEmpty()) 620 entry->stringArg += *idBlock; 621 } 622 if (!parser.dataVector.isEmpty()) { 623 TransliterationRuleData* data = static_cast<TransliterationRuleData*>(parser.dataVector.orphanElementAt(0)); 624 entry->u.dataVector->addElement(data, status); 625 if (U_FAILURE(status)) { 626 delete data; 627 } 628 entry->stringArg += static_cast<char16_t>(0xffff); // use U+FFFF to mark position of RBTs in ID block 629 } 630 } 631 } 632 } 633 634 Transliterator *t = 635 instantiateEntry(ID, entry, aliasReturn, status); 636 return t; 637 } 638 639 void TransliteratorRegistry::put(Transliterator* adoptedProto, 640 UBool visible, 641 UErrorCode& ec) 642 { 643 TransliteratorEntry *entry = new TransliteratorEntry(); 644 if (entry == nullptr) { 645 ec = U_MEMORY_ALLOCATION_ERROR; 646 return; 647 } 648 entry->adoptPrototype(adoptedProto); 649 registerEntry(adoptedProto->getID(), entry, visible); 650 } 651 652 void TransliteratorRegistry::put(const UnicodeString& ID, 653 Transliterator::Factory factory, 654 Transliterator::Token context, 655 UBool visible, 656 UErrorCode& ec) { 657 TransliteratorEntry *entry = new TransliteratorEntry(); 658 if (entry == nullptr) { 659 ec = U_MEMORY_ALLOCATION_ERROR; 660 return; 661 } 662 entry->setFactory(factory, context); 663 registerEntry(ID, entry, visible); 664 } 665 666 void TransliteratorRegistry::put(const UnicodeString& ID, 667 const UnicodeString& resourceName, 668 UTransDirection dir, 669 UBool readonlyResourceAlias, 670 UBool visible, 671 UErrorCode& ec) { 672 TransliteratorEntry *entry = new TransliteratorEntry(); 673 if (entry == nullptr) { 674 ec = U_MEMORY_ALLOCATION_ERROR; 675 return; 676 } 677 entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD 678 : TransliteratorEntry::RULES_REVERSE; 679 if (readonlyResourceAlias) { 680 entry->stringArg.setTo(true, resourceName.getBuffer(), -1); 681 } 682 else { 683 entry->stringArg = resourceName; 684 } 685 registerEntry(ID, entry, visible); 686 } 687 688 void TransliteratorRegistry::put(const UnicodeString& ID, 689 const UnicodeString& alias, 690 UBool readonlyAliasAlias, 691 UBool visible, 692 UErrorCode& /*ec*/) { 693 TransliteratorEntry *entry = new TransliteratorEntry(); 694 // Null pointer check 695 if (entry != nullptr) { 696 entry->entryType = TransliteratorEntry::ALIAS; 697 if (readonlyAliasAlias) { 698 entry->stringArg.setTo(true, alias.getBuffer(), -1); 699 } 700 else { 701 entry->stringArg = alias; 702 } 703 registerEntry(ID, entry, visible); 704 } 705 } 706 707 void TransliteratorRegistry::remove(const UnicodeString& ID) { 708 UnicodeString source, target, variant; 709 UBool sawSource; 710 TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); 711 // Only need to do this if ID.indexOf('-') < 0 712 UnicodeString id; 713 TransliteratorIDParser::STVtoID(source, target, variant, id); 714 registry.remove(id); 715 removeSTV(source, target, variant); 716 availableIDs.remove(id); 717 } 718 719 //---------------------------------------------------------------------- 720 // class TransliteratorRegistry: Public ID and spec management 721 //---------------------------------------------------------------------- 722 723 /** 724 * == OBSOLETE - remove in ICU 3.4 == 725 * Return the number of IDs currently registered with the system. 726 * To retrieve the actual IDs, call getAvailableID(i) with 727 * i from 0 to countAvailableIDs() - 1. 728 */ 729 int32_t TransliteratorRegistry::countAvailableIDs() const { 730 return availableIDs.count(); 731 } 732 733 /** 734 * == OBSOLETE - remove in ICU 3.4 == 735 * Return the index-th available ID. index must be between 0 736 * and countAvailableIDs() - 1, inclusive. If index is out of 737 * range, the result of getAvailableID(0) is returned. 738 */ 739 const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const { 740 if (index < 0 || index >= availableIDs.count()) { 741 index = 0; 742 } 743 744 int32_t pos = UHASH_FIRST; 745 const UHashElement *e = nullptr; 746 while (index-- >= 0) { 747 e = availableIDs.nextElement(pos); 748 if (e == nullptr) { 749 break; 750 } 751 } 752 753 if (e != nullptr) { 754 return *static_cast<UnicodeString*>(e->key.pointer); 755 } 756 757 // If the code reaches here, the hash table was likely modified during iteration. 758 // Return an statically initialized empty string due to reference return type. 759 static UnicodeString empty; 760 return empty; 761 } 762 763 StringEnumeration* TransliteratorRegistry::getAvailableIDs() const { 764 return new Enumeration(*this); 765 } 766 767 int32_t TransliteratorRegistry::countAvailableSources() const { 768 return specDAG.count(); 769 } 770 771 UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index, 772 UnicodeString& result) const { 773 int32_t pos = UHASH_FIRST; 774 const UHashElement* e = nullptr; 775 while (index-- >= 0) { 776 e = specDAG.nextElement(pos); 777 if (e == nullptr) { 778 break; 779 } 780 } 781 if (e == nullptr) { 782 result.truncate(0); 783 } else { 784 result = *static_cast<UnicodeString*>(e->key.pointer); 785 } 786 return result; 787 } 788 789 int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString& source) const { 790 Hashtable* targets = static_cast<Hashtable*>(specDAG.get(source)); 791 return (targets == nullptr) ? 0 : targets->count(); 792 } 793 794 UnicodeString& TransliteratorRegistry::getAvailableTarget(int32_t index, 795 const UnicodeString& source, 796 UnicodeString& result) const { 797 Hashtable* targets = static_cast<Hashtable*>(specDAG.get(source)); 798 if (targets == nullptr) { 799 result.truncate(0); // invalid source 800 return result; 801 } 802 int32_t pos = UHASH_FIRST; 803 const UHashElement* e = nullptr; 804 while (index-- >= 0) { 805 e = targets->nextElement(pos); 806 if (e == nullptr) { 807 break; 808 } 809 } 810 if (e == nullptr) { 811 result.truncate(0); // invalid index 812 } else { 813 result = *static_cast<UnicodeString*>(e->key.pointer); 814 } 815 return result; 816 } 817 818 int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source, 819 const UnicodeString& target) const { 820 Hashtable* targets = static_cast<Hashtable*>(specDAG.get(source)); 821 if (targets == nullptr) { 822 return 0; 823 } 824 uint32_t varMask = targets->geti(target); 825 int32_t varCount = 0; 826 while (varMask > 0) { 827 if (varMask & 1) { 828 varCount++; 829 } 830 varMask >>= 1; 831 } 832 return varCount; 833 } 834 835 UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index, 836 const UnicodeString& source, 837 const UnicodeString& target, 838 UnicodeString& result) const { 839 Hashtable* targets = static_cast<Hashtable*>(specDAG.get(source)); 840 if (targets == nullptr) { 841 result.truncate(0); // invalid source 842 return result; 843 } 844 uint32_t varMask = targets->geti(target); 845 int32_t varCount = 0; 846 int32_t varListIndex = 0; 847 while (varMask > 0) { 848 if (varMask & 1) { 849 if (varCount == index) { 850 UnicodeString* v = static_cast<UnicodeString*>(variantList.elementAt(varListIndex)); 851 if (v != nullptr) { 852 result = *v; 853 return result; 854 } 855 break; 856 } 857 varCount++; 858 } 859 varMask >>= 1; 860 varListIndex++; 861 } 862 result.truncate(0); // invalid target or index 863 return result; 864 } 865 866 //---------------------------------------------------------------------- 867 // class TransliteratorRegistry::Enumeration 868 //---------------------------------------------------------------------- 869 870 TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) : 871 pos(UHASH_FIRST), size(_reg.availableIDs.count()), reg(_reg) { 872 } 873 874 TransliteratorRegistry::Enumeration::~Enumeration() { 875 } 876 877 int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const { 878 return size; 879 } 880 881 const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) { 882 // This is sloppy but safe -- if we get out of sync with the underlying 883 // registry, we will still return legal strings, but they might not 884 // correspond to the snapshot at construction time. So there could be 885 // duplicate IDs or omitted IDs if insertions or deletions occur in one 886 // thread while another is iterating. To be more rigorous, add a timestamp, 887 // which is incremented with any modification, and validate this iterator 888 // against the timestamp at construction time. This probably isn't worth 889 // doing as long as there is some possibility of removing this code in favor 890 // of some new code based on Doug's service framework. 891 if (U_FAILURE(status)) { 892 return nullptr; 893 } 894 int32_t n = reg.availableIDs.count(); 895 if (n != size) { 896 status = U_ENUM_OUT_OF_SYNC_ERROR; 897 return nullptr; 898 } 899 900 const UHashElement* element = reg.availableIDs.nextElement(pos); 901 if (element == nullptr) { 902 // If the code reaches this point, it means that it's out of sync 903 // or the caller keeps asking for snext(). 904 return nullptr; 905 } 906 907 // Copy the string! This avoids lifetime problems. 908 unistr = *static_cast<const UnicodeString*>(element->key.pointer); 909 return &unistr; 910 } 911 912 void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) { 913 pos = UHASH_FIRST; 914 size = reg.availableIDs.count(); 915 } 916 917 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration) 918 919 //---------------------------------------------------------------------- 920 // class TransliteratorRegistry: internal 921 //---------------------------------------------------------------------- 922 923 /** 924 * Convenience method. Calls 6-arg registerEntry(). 925 */ 926 void TransliteratorRegistry::registerEntry(const UnicodeString& source, 927 const UnicodeString& target, 928 const UnicodeString& variant, 929 TransliteratorEntry* adopted, 930 UBool visible) { 931 UnicodeString ID; 932 UnicodeString s(source); 933 if (s.length() == 0) { 934 s.setTo(true, ANY, 3); 935 } 936 TransliteratorIDParser::STVtoID(source, target, variant, ID); 937 registerEntry(ID, s, target, variant, adopted, visible); 938 } 939 940 /** 941 * Convenience method. Calls 6-arg registerEntry(). 942 */ 943 void TransliteratorRegistry::registerEntry(const UnicodeString& ID, 944 TransliteratorEntry* adopted, 945 UBool visible) { 946 UnicodeString source, target, variant; 947 UBool sawSource; 948 TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); 949 // Only need to do this if ID.indexOf('-') < 0 950 UnicodeString id; 951 TransliteratorIDParser::STVtoID(source, target, variant, id); 952 registerEntry(id, source, target, variant, adopted, visible); 953 } 954 955 /** 956 * Register an entry object (adopted) with the given ID, source, 957 * target, and variant strings. 958 */ 959 void TransliteratorRegistry::registerEntry(const UnicodeString& ID, 960 const UnicodeString& source, 961 const UnicodeString& target, 962 const UnicodeString& variant, 963 TransliteratorEntry* adopted, 964 UBool visible) { 965 UErrorCode status = U_ZERO_ERROR; 966 registry.put(ID, adopted, status); 967 if (visible) { 968 registerSTV(source, target, variant); 969 if (!availableIDs.containsKey(ID)) { 970 availableIDs.puti(ID, /* unused value */ 1, status); 971 } 972 } else { 973 removeSTV(source, target, variant); 974 availableIDs.remove(ID); 975 } 976 } 977 978 /** 979 * Register a source-target/variant in the specDAG. Variant may be 980 * empty, but source and target must not be. 981 */ 982 void TransliteratorRegistry::registerSTV(const UnicodeString& source, 983 const UnicodeString& target, 984 const UnicodeString& variant) { 985 // assert(source.length() > 0); 986 // assert(target.length() > 0); 987 UErrorCode status = U_ZERO_ERROR; 988 Hashtable* targets = static_cast<Hashtable*>(specDAG.get(source)); 989 if (targets == nullptr) { 990 int32_t size = 3; 991 if (source.compare(ANY,3) == 0) { 992 size = ANY_TARGETS_INIT_SIZE; 993 } else if (source.compare(LAT,3) == 0) { 994 size = LAT_TARGETS_INIT_SIZE; 995 } 996 targets = new Hashtable(true, size, status); 997 if (U_FAILURE(status) || targets == nullptr) { 998 return; 999 } 1000 specDAG.put(source, targets, status); 1001 } 1002 int32_t variantListIndex = variantList.indexOf((void*) &variant, 0); 1003 if (variantListIndex < 0) { 1004 if (variantList.size() >= VARIANT_LIST_MAX_SIZE) { 1005 // can't handle any more variants 1006 return; 1007 } 1008 UnicodeString *variantEntry = new UnicodeString(variant); 1009 if (variantEntry != nullptr) { 1010 variantList.adoptElement(variantEntry, status); 1011 if (U_SUCCESS(status)) { 1012 variantListIndex = variantList.size() - 1; 1013 } 1014 } 1015 if (variantListIndex < 0) { 1016 return; 1017 } 1018 } 1019 uint32_t addMask = 1 << variantListIndex; 1020 uint32_t varMask = targets->geti(target); 1021 targets->puti(target, varMask | addMask, status); 1022 } 1023 1024 /** 1025 * Remove a source-target/variant from the specDAG. 1026 */ 1027 void TransliteratorRegistry::removeSTV(const UnicodeString& source, 1028 const UnicodeString& target, 1029 const UnicodeString& variant) { 1030 // assert(source.length() > 0); 1031 // assert(target.length() > 0); 1032 UErrorCode status = U_ZERO_ERROR; 1033 Hashtable* targets = static_cast<Hashtable*>(specDAG.get(source)); 1034 if (targets == nullptr) { 1035 return; // should never happen for valid s-t/v 1036 } 1037 uint32_t varMask = targets->geti(target); 1038 if (varMask == 0) { 1039 return; // should never happen for valid s-t/v 1040 } 1041 int32_t variantListIndex = variantList.indexOf((void*) &variant, 0); 1042 if (variantListIndex < 0) { 1043 return; // should never happen for valid s-t/v 1044 } 1045 int32_t remMask = 1 << variantListIndex; 1046 varMask &= (~remMask); 1047 if (varMask != 0) { 1048 targets->puti(target, varMask, status); 1049 } else { 1050 targets->remove(target); // should delete variants 1051 if (targets->count() == 0) { 1052 specDAG.remove(source); // should delete targets 1053 } 1054 } 1055 } 1056 1057 /** 1058 * Attempt to find a source-target/variant in the dynamic registry 1059 * store. Return 0 on failure. 1060 * 1061 * Caller does NOT own returned object. 1062 */ 1063 TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src, 1064 const TransliteratorSpec& trg, 1065 const UnicodeString& variant) const { 1066 UnicodeString ID; 1067 TransliteratorIDParser::STVtoID(src, trg, variant, ID); 1068 TransliteratorEntry* e = static_cast<TransliteratorEntry*>(registry.get(ID)); 1069 DEBUG_useEntry(e); 1070 return e; 1071 } 1072 1073 /** 1074 * Attempt to find a source-target/variant in the static locale 1075 * resource store. Do not perform fallback. Return 0 on failure. 1076 * 1077 * On success, create a new entry object, register it in the dynamic 1078 * store, and return a pointer to it, but do not make it public -- 1079 * just because someone requested something, we do not expand the 1080 * available ID list (or spec DAG). 1081 * 1082 * Caller does NOT own returned object. 1083 */ 1084 TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src, 1085 const TransliteratorSpec& trg, 1086 const UnicodeString& variant) { 1087 TransliteratorEntry* entry = nullptr; 1088 if (src.isLocale()) { 1089 entry = findInBundle(src, trg, variant, UTRANS_FORWARD); 1090 } else if (trg.isLocale()) { 1091 entry = findInBundle(trg, src, variant, UTRANS_REVERSE); 1092 } 1093 1094 // If we found an entry, store it in the Hashtable for next 1095 // time. 1096 if (entry != nullptr) { 1097 registerEntry(src.getTop(), trg.getTop(), variant, entry, false); 1098 } 1099 1100 return entry; 1101 } 1102 1103 // As of 2.0, resource bundle keys cannot contain '_' 1104 static const char16_t TRANSLITERATE_TO[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,84,111,0}; // "TransliterateTo" 1105 1106 static const char16_t TRANSLITERATE_FROM[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,70,114,111,109,0}; // "TransliterateFrom" 1107 1108 static const char16_t TRANSLITERATE[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,0}; // "Transliterate" 1109 1110 /** 1111 * Attempt to find an entry in a single resource bundle. This is 1112 * a one-sided lookup. findInStaticStore() performs up to two such 1113 * lookups, one for the source, and one for the target. 1114 * 1115 * Do not perform fallback. Return 0 on failure. 1116 * 1117 * On success, create a new Entry object, populate it, and return it. 1118 * The caller owns the returned object. 1119 */ 1120 TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen, 1121 const TransliteratorSpec& specToFind, 1122 const UnicodeString& variant, 1123 UTransDirection direction) 1124 { 1125 UnicodeString utag; 1126 UnicodeString resStr; 1127 int32_t pass; 1128 1129 for (pass=0; pass<2; ++pass) { 1130 utag.truncate(0); 1131 // First try either TransliteratorTo_xxx or 1132 // TransliterateFrom_xxx, then try the bidirectional 1133 // Transliterate_xxx. This precedence order is arbitrary 1134 // but must be consistent and documented. 1135 if (pass == 0) { 1136 utag.append(direction == UTRANS_FORWARD ? 1137 TRANSLITERATE_TO : TRANSLITERATE_FROM, -1); 1138 } else { 1139 utag.append(TRANSLITERATE, -1); 1140 } 1141 UnicodeString s(specToFind.get()); 1142 utag.append(s.toUpper("")); 1143 UErrorCode status = U_ZERO_ERROR; 1144 ResourceBundle subres(specToOpen.getBundle().get( 1145 CharString().appendInvariantChars(utag, status).data(), status)); 1146 if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { 1147 continue; 1148 } 1149 1150 s.truncate(0); 1151 if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) { 1152 continue; 1153 } 1154 1155 if (variant.length() != 0) { 1156 status = U_ZERO_ERROR; 1157 resStr = subres.getStringEx( 1158 CharString().appendInvariantChars(variant, status).data(), status); 1159 if (U_SUCCESS(status)) { 1160 // Exit loop successfully 1161 break; 1162 } 1163 } else { 1164 // Variant is empty, which means match the first variant listed. 1165 status = U_ZERO_ERROR; 1166 resStr = subres.getStringEx(1, status); 1167 if (U_SUCCESS(status)) { 1168 // Exit loop successfully 1169 break; 1170 } 1171 } 1172 } 1173 1174 if (pass==2) { 1175 // Failed 1176 return nullptr; 1177 } 1178 1179 // We have succeeded in loading a string from the locale 1180 // resources. Create a new registry entry to hold it and return it. 1181 TransliteratorEntry *entry = new TransliteratorEntry(); 1182 if (entry != nullptr) { 1183 // The direction is always forward for the 1184 // TransliterateTo_xxx and TransliterateFrom_xxx 1185 // items; those are unidirectional forward rules. 1186 // For the bidirectional Transliterate_xxx items, 1187 // the direction is the value passed in to this 1188 // function. 1189 int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction; 1190 entry->entryType = TransliteratorEntry::LOCALE_RULES; 1191 entry->stringArg = resStr; 1192 entry->intArg = dir; 1193 } 1194 1195 return entry; 1196 } 1197 1198 /** 1199 * Convenience method. Calls 3-arg find(). 1200 */ 1201 TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) { 1202 UnicodeString source, target, variant; 1203 UBool sawSource; 1204 TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); 1205 return find(source, target, variant); 1206 } 1207 1208 /** 1209 * Top-level find method. Attempt to find a source-target/variant in 1210 * either the dynamic or the static (locale resource) store. Perform 1211 * fallback. 1212 * 1213 * Lookup sequence for ss_SS_SSS-tt_TT_TTT/v: 1214 * 1215 * ss_SS_SSS-tt_TT_TTT/v -- in hashtable 1216 * ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback) 1217 * 1218 * repeat with t = tt_TT_TTT, tt_TT, tt, and tscript 1219 * 1220 * ss_SS_SSS-t/ * 1221 * ss_SS-t/ * 1222 * ss-t/ * 1223 * sscript-t/ * 1224 * 1225 * Here * matches the first variant listed. 1226 * 1227 * Caller does NOT own returned object. Return 0 on failure. 1228 */ 1229 TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source, 1230 UnicodeString& target, 1231 UnicodeString& variant) { 1232 1233 TransliteratorSpec src(source); 1234 TransliteratorSpec trg(target); 1235 TransliteratorEntry* entry; 1236 1237 // Seek exact match in hashtable. Temporary fix for ICU 4.6. 1238 // TODO: The general logic for finding a matching transliterator needs to be reviewed. 1239 // ICU ticket #8089 1240 UnicodeString ID; 1241 TransliteratorIDParser::STVtoID(source, target, variant, ID); 1242 entry = static_cast<TransliteratorEntry*>(registry.get(ID)); 1243 if (entry != nullptr) { 1244 // std::string ss; 1245 // std::cout << ID.toUTF8String(ss) << std::endl; 1246 return entry; 1247 } 1248 1249 if (variant.length() != 0) { 1250 1251 // Seek exact match in hashtable 1252 entry = findInDynamicStore(src, trg, variant); 1253 if (entry != nullptr) { 1254 return entry; 1255 } 1256 1257 // Seek exact match in locale resources 1258 entry = findInStaticStore(src, trg, variant); 1259 if (entry != nullptr) { 1260 return entry; 1261 } 1262 } 1263 1264 for (;;) { 1265 src.reset(); 1266 for (;;) { 1267 // Seek match in hashtable 1268 entry = findInDynamicStore(src, trg, NO_VARIANT); 1269 if (entry != nullptr) { 1270 return entry; 1271 } 1272 1273 // Seek match in locale resources 1274 entry = findInStaticStore(src, trg, NO_VARIANT); 1275 if (entry != nullptr) { 1276 return entry; 1277 } 1278 if (!src.hasFallback()) { 1279 break; 1280 } 1281 src.next(); 1282 } 1283 if (!trg.hasFallback()) { 1284 break; 1285 } 1286 trg.next(); 1287 } 1288 1289 return nullptr; 1290 } 1291 1292 /** 1293 * Given an Entry object, instantiate it. Caller owns result. Return 1294 * 0 on failure. 1295 * 1296 * Return a non-empty aliasReturn value if the ID points to an alias. 1297 * We cannot instantiate it ourselves because the alias may contain 1298 * filters or compounds, which we do not understand. Caller should 1299 * make aliasReturn empty before calling. 1300 * 1301 * The entry object is assumed to reside in the dynamic store. It may be 1302 * modified. 1303 */ 1304 Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID, 1305 TransliteratorEntry *entry, 1306 TransliteratorAlias* &aliasReturn, 1307 UErrorCode& status) { 1308 Transliterator* t = nullptr; 1309 U_ASSERT(aliasReturn == 0); 1310 1311 switch (entry->entryType) { 1312 case TransliteratorEntry::RBT_DATA: 1313 t = new RuleBasedTransliterator(ID, entry->u.data); 1314 if (t == nullptr) { 1315 status = U_MEMORY_ALLOCATION_ERROR; 1316 } 1317 return t; 1318 case TransliteratorEntry::PROTOTYPE: 1319 t = entry->u.prototype->clone(); 1320 if (t == nullptr) { 1321 status = U_MEMORY_ALLOCATION_ERROR; 1322 } 1323 return t; 1324 case TransliteratorEntry::ALIAS: 1325 aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter); 1326 if (aliasReturn == nullptr) { 1327 status = U_MEMORY_ALLOCATION_ERROR; 1328 } 1329 return nullptr; 1330 case TransliteratorEntry::FACTORY: 1331 t = entry->u.factory.function(ID, entry->u.factory.context); 1332 if (t == nullptr) { 1333 status = U_MEMORY_ALLOCATION_ERROR; 1334 } 1335 return t; 1336 case TransliteratorEntry::COMPOUND_RBT: 1337 { 1338 UVector* rbts = new UVector(uprv_deleteUObject, nullptr, entry->u.dataVector->size(), status); 1339 // Check for null pointer 1340 if (rbts == nullptr) { 1341 status = U_MEMORY_ALLOCATION_ERROR; 1342 return nullptr; 1343 } 1344 int32_t passNumber = 1; 1345 for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) { 1346 // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")? 1347 Transliterator* tl = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++), 1348 static_cast<TransliterationRuleData*>(entry->u.dataVector->elementAt(i)), false); 1349 if (tl == nullptr) 1350 status = U_MEMORY_ALLOCATION_ERROR; 1351 else 1352 rbts->adoptElement(tl, status); 1353 } 1354 if (U_FAILURE(status)) { 1355 delete rbts; 1356 return nullptr; 1357 } 1358 rbts->setDeleter(nullptr); 1359 aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter); 1360 } 1361 if (aliasReturn == nullptr) { 1362 status = U_MEMORY_ALLOCATION_ERROR; 1363 } 1364 return nullptr; 1365 case TransliteratorEntry::LOCALE_RULES: 1366 aliasReturn = new TransliteratorAlias(ID, entry->stringArg, 1367 static_cast<UTransDirection>(entry->intArg)); 1368 if (aliasReturn == nullptr) { 1369 status = U_MEMORY_ALLOCATION_ERROR; 1370 } 1371 return nullptr; 1372 case TransliteratorEntry::RULES_FORWARD: 1373 case TransliteratorEntry::RULES_REVERSE: 1374 // Process the rule data into a TransliteratorRuleData object, 1375 // and possibly also into an ::id header and/or footer. Then 1376 // we modify the registry with the parsed data and retry. 1377 { 1378 TransliteratorParser parser(status); 1379 1380 // We use the file name, taken from another resource bundle 1381 // 2-d array at static init time, as a locale language. We're 1382 // just using the locale mechanism to map through to a file 1383 // name; this in no way represents an actual locale. 1384 //CharString ch(entry->stringArg); 1385 //UResourceBundle *bundle = ures_openDirect(0, ch, &status); 1386 UnicodeString rules = entry->stringArg; 1387 //ures_close(bundle); 1388 1389 //if (U_FAILURE(status)) { 1390 // We have a failure of some kind. Remove the ID from the 1391 // registry so we don't keep trying. NOTE: This will throw off 1392 // anyone who is, at the moment, trying to iterate over the 1393 // available IDs. That's acceptable since we should never 1394 // really get here except under installation, configuration, 1395 // or unrecoverable run time memory failures. 1396 // remove(ID); 1397 //} else { 1398 1399 // If the status indicates a failure, then we don't have any 1400 // rules -- there is probably an installation error. The list 1401 // in the root locale should correspond to all the installed 1402 // transliterators; if it lists something that's not 1403 // installed, we'll get an error from ResourceBundle. 1404 aliasReturn = new TransliteratorAlias(ID, rules, 1405 ((entry->entryType == TransliteratorEntry::RULES_REVERSE) ? 1406 UTRANS_REVERSE : UTRANS_FORWARD)); 1407 if (aliasReturn == nullptr) { 1408 status = U_MEMORY_ALLOCATION_ERROR; 1409 } 1410 //} 1411 } 1412 return nullptr; 1413 default: 1414 UPRV_UNREACHABLE_EXIT; // can't get here 1415 } 1416 } 1417 U_NAMESPACE_END 1418 1419 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1420 1421 //eof