rbnf.cpp (62680B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1997-2015, International Business Machines Corporation 6 * and others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 #include "unicode/utypes.h" 11 #include "utypeinfo.h" // for 'typeid' to work 12 13 #include "unicode/rbnf.h" 14 15 #if U_HAVE_RBNF 16 17 #include "unicode/normlzr.h" 18 #include "unicode/plurfmt.h" 19 #include "unicode/tblcoll.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ucol.h" 22 #include "unicode/uloc.h" 23 #include "unicode/unum.h" 24 #include "unicode/ures.h" 25 #include "unicode/ustring.h" 26 #include "unicode/utf16.h" 27 #include "unicode/udata.h" 28 #include "unicode/udisplaycontext.h" 29 #include "unicode/brkiter.h" 30 #include "unicode/ucasemap.h" 31 32 #include "cmemory.h" 33 #include "cstring.h" 34 #include "patternprops.h" 35 #include "uresimp.h" 36 #include "nfrs.h" 37 #include "number_decimalquantity.h" 38 39 // debugging 40 // #define RBNF_DEBUG 41 42 #ifdef RBNF_DEBUG 43 #include <stdio.h> 44 #endif 45 46 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" 47 48 static const char16_t gPercentPercent[] = 49 { 50 0x25, 0x25, 0 51 }; /* "%%" */ 52 53 // All urbnf objects are created through openRules, so we init all of the 54 // Unicode string constants required by rbnf, nfrs, or nfr here. 55 static const char16_t gLenientParse[] = 56 { 57 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 58 }; /* "%%lenient-parse:" */ 59 static const char16_t gSemiColon = 0x003B; 60 static const char16_t gSemiPercent[] = 61 { 62 0x3B, 0x25, 0 63 }; /* ";%" */ 64 65 #define kSomeNumberOfBitsDiv2 22 66 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) 67 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) 68 69 U_NAMESPACE_BEGIN 70 71 using number::impl::DecimalQuantity; 72 73 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) 74 75 /* 76 This is a utility class. It does not use ICU's RTTI. 77 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. 78 Please make sure that intltest passes on Windows in Release mode, 79 since the string pooling per compilation unit will mess up how RTTI works. 80 The RTTI code was also removed due to lack of code coverage. 81 */ 82 class LocalizationInfo : public UMemory { 83 protected: 84 virtual ~LocalizationInfo(); 85 uint32_t refcount; 86 87 public: 88 LocalizationInfo() : refcount(0) {} 89 90 LocalizationInfo* ref() { 91 ++refcount; 92 return this; 93 } 94 95 LocalizationInfo* unref() { 96 if (refcount && --refcount == 0) { 97 delete this; 98 } 99 return nullptr; 100 } 101 102 virtual bool operator==(const LocalizationInfo* rhs) const; 103 inline bool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } 104 105 virtual int32_t getNumberOfRuleSets() const = 0; 106 virtual const char16_t* getRuleSetName(int32_t index) const = 0; 107 virtual int32_t getNumberOfDisplayLocales() const = 0; 108 virtual const char16_t* getLocaleName(int32_t index) const = 0; 109 virtual const char16_t* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; 110 111 virtual int32_t indexForLocale(const char16_t* locale) const; 112 virtual int32_t indexForRuleSet(const char16_t* ruleset) const; 113 114 // virtual UClassID getDynamicClassID() const = 0; 115 // static UClassID getStaticClassID(); 116 }; 117 118 LocalizationInfo::~LocalizationInfo() {} 119 120 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) 121 122 // if both strings are nullptr, this returns true 123 static UBool 124 streq(const char16_t* lhs, const char16_t* rhs) { 125 if (rhs == lhs) { 126 return true; 127 } 128 if (lhs && rhs) { 129 return u_strcmp(lhs, rhs) == 0; 130 } 131 return false; 132 } 133 134 bool 135 LocalizationInfo::operator==(const LocalizationInfo* rhs) const { 136 if (rhs) { 137 if (this == rhs) { 138 return true; 139 } 140 141 int32_t rsc = getNumberOfRuleSets(); 142 if (rsc == rhs->getNumberOfRuleSets()) { 143 for (int i = 0; i < rsc; ++i) { 144 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { 145 return false; 146 } 147 } 148 int32_t dlc = getNumberOfDisplayLocales(); 149 if (dlc == rhs->getNumberOfDisplayLocales()) { 150 for (int i = 0; i < dlc; ++i) { 151 const char16_t* locale = getLocaleName(i); 152 int32_t ix = rhs->indexForLocale(locale); 153 // if no locale, ix is -1, getLocaleName returns null, so streq returns false 154 if (!streq(locale, rhs->getLocaleName(ix))) { 155 return false; 156 } 157 for (int j = 0; j < rsc; ++j) { 158 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { 159 return false; 160 } 161 } 162 } 163 return true; 164 } 165 } 166 } 167 return false; 168 } 169 170 int32_t 171 LocalizationInfo::indexForLocale(const char16_t* locale) const { 172 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { 173 if (streq(locale, getLocaleName(i))) { 174 return i; 175 } 176 } 177 return -1; 178 } 179 180 int32_t 181 LocalizationInfo::indexForRuleSet(const char16_t* ruleset) const { 182 if (ruleset) { 183 for (int i = 0; i < getNumberOfRuleSets(); ++i) { 184 if (streq(ruleset, getRuleSetName(i))) { 185 return i; 186 } 187 } 188 } 189 return -1; 190 } 191 192 193 typedef void (*Fn_Deleter)(void*); 194 195 class VArray { 196 void** buf; 197 int32_t cap; 198 int32_t size; 199 Fn_Deleter deleter; 200 public: 201 VArray() : buf(nullptr), cap(0), size(0), deleter(nullptr) {} 202 203 VArray(Fn_Deleter del) : buf(nullptr), cap(0), size(0), deleter(del) {} 204 205 ~VArray() { 206 if (deleter) { 207 for (int i = 0; i < size; ++i) { 208 (*deleter)(buf[i]); 209 } 210 } 211 uprv_free(buf); 212 } 213 214 int32_t length() { 215 return size; 216 } 217 218 void add(void* elem, UErrorCode& status) { 219 if (U_SUCCESS(status)) { 220 if (size == cap) { 221 if (cap == 0) { 222 cap = 1; 223 } else if (cap < 256) { 224 cap *= 2; 225 } else { 226 cap += 256; 227 } 228 if (buf == nullptr) { 229 buf = static_cast<void**>(uprv_malloc(cap * sizeof(void*))); 230 } else { 231 buf = static_cast<void**>(uprv_realloc(buf, cap * sizeof(void*))); 232 } 233 if (buf == nullptr) { 234 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway 235 status = U_MEMORY_ALLOCATION_ERROR; 236 return; 237 } 238 void* start = &buf[size]; 239 size_t count = (cap - size) * sizeof(void*); 240 uprv_memset(start, 0, count); // fill with nulls, just because 241 } 242 buf[size++] = elem; 243 } 244 } 245 246 void** release() { 247 void** result = buf; 248 buf = nullptr; 249 cap = 0; 250 size = 0; 251 return result; 252 } 253 }; 254 255 class LocDataParser; 256 257 class StringLocalizationInfo : public LocalizationInfo { 258 char16_t* info; 259 char16_t*** data; 260 int32_t numRuleSets; 261 int32_t numLocales; 262 263 friend class LocDataParser; 264 265 StringLocalizationInfo(char16_t* i, char16_t*** d, int32_t numRS, int32_t numLocs) 266 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) 267 { 268 } 269 270 public: 271 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); 272 273 virtual ~StringLocalizationInfo(); 274 virtual int32_t getNumberOfRuleSets() const override { return numRuleSets; } 275 virtual const char16_t* getRuleSetName(int32_t index) const override; 276 virtual int32_t getNumberOfDisplayLocales() const override { return numLocales; } 277 virtual const char16_t* getLocaleName(int32_t index) const override; 278 virtual const char16_t* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const override; 279 280 // virtual UClassID getDynamicClassID() const; 281 // static UClassID getStaticClassID(); 282 283 private: 284 void init(UErrorCode& status) const; 285 }; 286 287 288 enum { 289 OPEN_ANGLE = 0x003c, /* '<' */ 290 CLOSE_ANGLE = 0x003e, /* '>' */ 291 COMMA = 0x002c, 292 TICK = 0x0027, 293 QUOTE = 0x0022, 294 SPACE = 0x0020 295 }; 296 297 /** 298 * Utility for parsing a localization string and returning a StringLocalizationInfo*. 299 */ 300 class LocDataParser { 301 char16_t* data; 302 const char16_t* e; 303 char16_t* p; 304 char16_t ch; 305 UParseError& pe; 306 UErrorCode& ec; 307 308 public: 309 LocDataParser(UParseError& parseError, UErrorCode& status) 310 : data(nullptr), e(nullptr), p(nullptr), ch(0xffff), pe(parseError), ec(status) {} 311 ~LocDataParser() {} 312 313 /* 314 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, 315 * and return nullptr. The StringLocalizationInfo will adopt locData if it is created. 316 */ 317 StringLocalizationInfo* parse(char16_t* data, int32_t len); 318 319 private: 320 321 inline void inc() { 322 ++p; 323 ch = 0xffff; 324 } 325 inline UBool checkInc(char16_t c) { 326 if (p < e && (ch == c || *p == c)) { 327 inc(); 328 return true; 329 } 330 return false; 331 } 332 inline UBool check(char16_t c) { 333 return p < e && (ch == c || *p == c); 334 } 335 inline void skipWhitespace() { 336 while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) { 337 inc(); 338 } 339 } 340 inline UBool inList(char16_t c, const char16_t* list) const { 341 if (*list == SPACE && PatternProps::isWhiteSpace(c)) { 342 return true; 343 } 344 while (*list && *list != c) { 345 ++list; 346 } 347 return *list == c; 348 } 349 void parseError(const char* msg); 350 351 StringLocalizationInfo* doParse(); 352 353 char16_t** nextArray(int32_t& requiredLength); 354 char16_t* nextString(); 355 }; 356 357 #ifdef RBNF_DEBUG 358 #define ERROR(msg) UPRV_BLOCK_MACRO_BEGIN { \ 359 parseError(msg); \ 360 return nullptr; \ 361 } UPRV_BLOCK_MACRO_END 362 #define EXPLANATION_ARG explanationArg 363 #else 364 #define ERROR(msg) UPRV_BLOCK_MACRO_BEGIN { \ 365 parseError(nullptr); \ 366 return nullptr; \ 367 } UPRV_BLOCK_MACRO_END 368 #define EXPLANATION_ARG 369 #endif 370 371 372 static const char16_t DQUOTE_STOPLIST[] = { 373 QUOTE, 0 374 }; 375 376 static const char16_t SQUOTE_STOPLIST[] = { 377 TICK, 0 378 }; 379 380 static const char16_t NOQUOTE_STOPLIST[] = { 381 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 382 }; 383 384 static void 385 DeleteFn(void* p) { 386 uprv_free(p); 387 } 388 389 StringLocalizationInfo* 390 LocDataParser::parse(char16_t* _data, int32_t len) { 391 if (U_FAILURE(ec)) { 392 if (_data) uprv_free(_data); 393 return nullptr; 394 } 395 396 pe.line = 0; 397 pe.offset = -1; 398 pe.postContext[0] = 0; 399 pe.preContext[0] = 0; 400 401 if (_data == nullptr) { 402 ec = U_ILLEGAL_ARGUMENT_ERROR; 403 return nullptr; 404 } 405 406 if (len <= 0) { 407 ec = U_ILLEGAL_ARGUMENT_ERROR; 408 uprv_free(_data); 409 return nullptr; 410 } 411 412 data = _data; 413 e = data + len; 414 p = _data; 415 ch = 0xffff; 416 417 return doParse(); 418 } 419 420 421 StringLocalizationInfo* 422 LocDataParser::doParse() { 423 skipWhitespace(); 424 if (!checkInc(OPEN_ANGLE)) { 425 ERROR("Missing open angle"); 426 } else { 427 VArray array(DeleteFn); 428 UBool mightHaveNext = true; 429 int32_t requiredLength = -1; 430 while (mightHaveNext) { 431 mightHaveNext = false; 432 char16_t** elem = nextArray(requiredLength); 433 skipWhitespace(); 434 UBool haveComma = check(COMMA); 435 if (elem) { 436 array.add(elem, ec); 437 if (haveComma) { 438 inc(); 439 mightHaveNext = true; 440 } 441 } else if (haveComma) { 442 ERROR("Unexpected character"); 443 } 444 } 445 446 skipWhitespace(); 447 if (!checkInc(CLOSE_ANGLE)) { 448 if (check(OPEN_ANGLE)) { 449 ERROR("Missing comma in outer array"); 450 } else { 451 ERROR("Missing close angle bracket in outer array"); 452 } 453 } 454 455 skipWhitespace(); 456 if (p != e) { 457 ERROR("Extra text after close of localization data"); 458 } 459 460 array.add(nullptr, ec); 461 if (U_SUCCESS(ec)) { 462 int32_t numLocs = array.length() - 2; // subtract first, nullptr 463 char16_t*** result = reinterpret_cast<char16_t***>(array.release()); 464 465 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, nullptr 466 } 467 } 468 469 ERROR("Unknown error"); 470 } 471 472 char16_t** 473 LocDataParser::nextArray(int32_t& requiredLength) { 474 if (U_FAILURE(ec)) { 475 return nullptr; 476 } 477 478 skipWhitespace(); 479 if (!checkInc(OPEN_ANGLE)) { 480 ERROR("Missing open angle"); 481 } 482 483 VArray array; 484 UBool mightHaveNext = true; 485 while (mightHaveNext) { 486 mightHaveNext = false; 487 char16_t* elem = nextString(); 488 skipWhitespace(); 489 UBool haveComma = check(COMMA); 490 if (elem) { 491 array.add(elem, ec); 492 if (haveComma) { 493 inc(); 494 mightHaveNext = true; 495 } 496 } else if (haveComma) { 497 ERROR("Unexpected comma"); 498 } 499 } 500 skipWhitespace(); 501 if (!checkInc(CLOSE_ANGLE)) { 502 if (check(OPEN_ANGLE)) { 503 ERROR("Missing close angle bracket in inner array"); 504 } else { 505 ERROR("Missing comma in inner array"); 506 } 507 } 508 509 array.add(nullptr, ec); 510 if (U_SUCCESS(ec)) { 511 if (requiredLength == -1) { 512 requiredLength = array.length() + 1; 513 } else if (array.length() != requiredLength) { 514 ec = U_ILLEGAL_ARGUMENT_ERROR; 515 ERROR("Array not of required length"); 516 } 517 518 return reinterpret_cast<char16_t**>(array.release()); 519 } 520 ERROR("Unknown Error"); 521 } 522 523 char16_t* 524 LocDataParser::nextString() { 525 char16_t* result = nullptr; 526 527 skipWhitespace(); 528 if (p < e) { 529 const char16_t* terminators; 530 char16_t c = *p; 531 UBool haveQuote = c == QUOTE || c == TICK; 532 if (haveQuote) { 533 inc(); 534 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; 535 } else { 536 terminators = NOQUOTE_STOPLIST; 537 } 538 char16_t* start = p; 539 while (p < e && !inList(*p, terminators)) ++p; 540 if (p == e) { 541 ERROR("Unexpected end of data"); 542 } 543 544 char16_t x = *p; 545 if (p > start) { 546 ch = x; 547 *p = 0x0; // terminate by writing to data 548 result = start; // just point into data 549 } 550 if (haveQuote) { 551 if (x != c) { 552 ERROR("Missing matching quote"); 553 } else if (p == start) { 554 ERROR("Empty string"); 555 } 556 inc(); 557 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { 558 ERROR("Unexpected character in string"); 559 } 560 } 561 562 // ok for there to be no next string 563 return result; 564 } 565 566 void LocDataParser::parseError(const char* EXPLANATION_ARG) 567 { 568 if (!data) { 569 return; 570 } 571 572 const char16_t* start = p - U_PARSE_CONTEXT_LEN - 1; 573 if (start < data) { 574 start = data; 575 } 576 for (char16_t* x = p; --x >= start;) { 577 if (!*x) { 578 start = x+1; 579 break; 580 } 581 } 582 const char16_t* limit = p + U_PARSE_CONTEXT_LEN - 1; 583 if (limit > e) { 584 limit = e; 585 } 586 u_strncpy(pe.preContext, start, static_cast<int32_t>(p - start)); 587 pe.preContext[p-start] = 0; 588 u_strncpy(pe.postContext, p, static_cast<int32_t>(limit - p)); 589 pe.postContext[limit-p] = 0; 590 pe.offset = static_cast<int32_t>(p - data); 591 592 #ifdef RBNF_DEBUG 593 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data); 594 595 UnicodeString msg; 596 msg.append(start, p - start); 597 msg.append((char16_t)0x002f); /* SOLIDUS/SLASH */ 598 msg.append(p, limit-p); 599 msg.append(UNICODE_STRING_SIMPLE("'")); 600 601 char buf[128]; 602 int32_t len = msg.extract(0, msg.length(), buf, 128); 603 if (len >= 128) { 604 buf[127] = 0; 605 } else { 606 buf[len] = 0; 607 } 608 fprintf(stderr, "%s\n", buf); 609 fflush(stderr); 610 #endif 611 612 uprv_free(data); 613 data = nullptr; 614 p = nullptr; 615 e = nullptr; 616 617 if (U_SUCCESS(ec)) { 618 ec = U_PARSE_ERROR; 619 } 620 } 621 622 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) 623 624 StringLocalizationInfo* 625 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { 626 if (U_FAILURE(status)) { 627 return nullptr; 628 } 629 630 int32_t len = info.length(); 631 if (len == 0) { 632 return nullptr; // no error; 633 } 634 635 char16_t* p = static_cast<char16_t*>(uprv_malloc(len * sizeof(char16_t))); 636 if (!p) { 637 status = U_MEMORY_ALLOCATION_ERROR; 638 return nullptr; 639 } 640 info.extract(p, len, status); 641 if (!U_FAILURE(status)) { 642 status = U_ZERO_ERROR; // clear warning about non-termination 643 } 644 645 LocDataParser parser(perror, status); 646 return parser.parse(p, len); 647 } 648 649 StringLocalizationInfo::~StringLocalizationInfo() { 650 for (char16_t*** p = data; *p; ++p) { 651 // remaining data is simply pointer into our unicode string data. 652 if (*p) uprv_free(*p); 653 } 654 if (data) uprv_free(data); 655 if (info) uprv_free(info); 656 } 657 658 659 const char16_t* 660 StringLocalizationInfo::getRuleSetName(int32_t index) const { 661 if (index >= 0 && index < getNumberOfRuleSets()) { 662 return data[0][index]; 663 } 664 return nullptr; 665 } 666 667 const char16_t* 668 StringLocalizationInfo::getLocaleName(int32_t index) const { 669 if (index >= 0 && index < getNumberOfDisplayLocales()) { 670 return data[index+1][0]; 671 } 672 return nullptr; 673 } 674 675 const char16_t* 676 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { 677 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && 678 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { 679 return data[localeIndex+1][ruleIndex+1]; 680 } 681 return nullptr; 682 } 683 684 // ---------- 685 686 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 687 const UnicodeString& locs, 688 const Locale& alocale, UParseError& perror, UErrorCode& status) 689 : fRuleSets(nullptr) 690 , ruleSetDescriptions(nullptr) 691 , numRuleSets(0) 692 , defaultRuleSet(nullptr) 693 , locale(alocale) 694 , collator(nullptr) 695 , decimalFormatSymbols(nullptr) 696 , defaultInfinityRule(nullptr) 697 , defaultNaNRule(nullptr) 698 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 699 , lenient(false) 700 , lenientParseRules(nullptr) 701 , localizations(nullptr) 702 , capitalizationInfoSet(false) 703 , capitalizationForUIListMenu(false) 704 , capitalizationForStandAlone(false) 705 , capitalizationBrkIter(nullptr) 706 { 707 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 708 init(description, locinfo, perror, status); 709 } 710 711 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 712 const UnicodeString& locs, 713 UParseError& perror, UErrorCode& status) 714 : fRuleSets(nullptr) 715 , ruleSetDescriptions(nullptr) 716 , numRuleSets(0) 717 , defaultRuleSet(nullptr) 718 , locale(Locale::getDefault()) 719 , collator(nullptr) 720 , decimalFormatSymbols(nullptr) 721 , defaultInfinityRule(nullptr) 722 , defaultNaNRule(nullptr) 723 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 724 , lenient(false) 725 , lenientParseRules(nullptr) 726 , localizations(nullptr) 727 , capitalizationInfoSet(false) 728 , capitalizationForUIListMenu(false) 729 , capitalizationForStandAlone(false) 730 , capitalizationBrkIter(nullptr) 731 { 732 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 733 init(description, locinfo, perror, status); 734 } 735 736 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 737 LocalizationInfo* info, 738 const Locale& alocale, UParseError& perror, UErrorCode& status) 739 : fRuleSets(nullptr) 740 , ruleSetDescriptions(nullptr) 741 , numRuleSets(0) 742 , defaultRuleSet(nullptr) 743 , locale(alocale) 744 , collator(nullptr) 745 , decimalFormatSymbols(nullptr) 746 , defaultInfinityRule(nullptr) 747 , defaultNaNRule(nullptr) 748 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 749 , lenient(false) 750 , lenientParseRules(nullptr) 751 , localizations(nullptr) 752 , capitalizationInfoSet(false) 753 , capitalizationForUIListMenu(false) 754 , capitalizationForStandAlone(false) 755 , capitalizationBrkIter(nullptr) 756 { 757 init(description, info, perror, status); 758 } 759 760 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 761 UParseError& perror, 762 UErrorCode& status) 763 : fRuleSets(nullptr) 764 , ruleSetDescriptions(nullptr) 765 , numRuleSets(0) 766 , defaultRuleSet(nullptr) 767 , locale(Locale::getDefault()) 768 , collator(nullptr) 769 , decimalFormatSymbols(nullptr) 770 , defaultInfinityRule(nullptr) 771 , defaultNaNRule(nullptr) 772 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 773 , lenient(false) 774 , lenientParseRules(nullptr) 775 , localizations(nullptr) 776 , capitalizationInfoSet(false) 777 , capitalizationForUIListMenu(false) 778 , capitalizationForStandAlone(false) 779 , capitalizationBrkIter(nullptr) 780 { 781 init(description, nullptr, perror, status); 782 } 783 784 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 785 const Locale& aLocale, 786 UParseError& perror, 787 UErrorCode& status) 788 : fRuleSets(nullptr) 789 , ruleSetDescriptions(nullptr) 790 , numRuleSets(0) 791 , defaultRuleSet(nullptr) 792 , locale(aLocale) 793 , collator(nullptr) 794 , decimalFormatSymbols(nullptr) 795 , defaultInfinityRule(nullptr) 796 , defaultNaNRule(nullptr) 797 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 798 , lenient(false) 799 , lenientParseRules(nullptr) 800 , localizations(nullptr) 801 , capitalizationInfoSet(false) 802 , capitalizationForUIListMenu(false) 803 , capitalizationForStandAlone(false) 804 , capitalizationBrkIter(nullptr) 805 { 806 init(description, nullptr, perror, status); 807 } 808 809 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) 810 : fRuleSets(nullptr) 811 , ruleSetDescriptions(nullptr) 812 , numRuleSets(0) 813 , defaultRuleSet(nullptr) 814 , locale(alocale) 815 , collator(nullptr) 816 , decimalFormatSymbols(nullptr) 817 , defaultInfinityRule(nullptr) 818 , defaultNaNRule(nullptr) 819 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 820 , lenient(false) 821 , lenientParseRules(nullptr) 822 , localizations(nullptr) 823 , capitalizationInfoSet(false) 824 , capitalizationForUIListMenu(false) 825 , capitalizationForStandAlone(false) 826 , capitalizationBrkIter(nullptr) 827 { 828 if (U_FAILURE(status)) { 829 return; 830 } 831 832 const char* rules_tag = "RBNFRules"; 833 const char* fmt_tag = ""; 834 switch (tag) { 835 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; 836 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; 837 case URBNF_DURATION: fmt_tag = "DurationRules"; break; 838 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; 839 default: status = U_ILLEGAL_ARGUMENT_ERROR; return; 840 } 841 842 // TODO: read localization info from resource 843 LocalizationInfo* locinfo = nullptr; 844 845 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); 846 if (U_SUCCESS(status)) { 847 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), 848 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); 849 850 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, nullptr, &status); 851 if (U_FAILURE(status)) { 852 ures_close(nfrb); 853 } 854 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, nullptr, &status); 855 if (U_FAILURE(status)) { 856 ures_close(rbnfRules); 857 ures_close(nfrb); 858 return; 859 } 860 861 UnicodeString desc; 862 while (ures_hasNext(ruleSets)) { 863 desc.append(ures_getNextUnicodeString(ruleSets,nullptr,&status)); 864 } 865 UParseError perror; 866 867 init(desc, locinfo, perror, status); 868 869 ures_close(ruleSets); 870 ures_close(rbnfRules); 871 } 872 ures_close(nfrb); 873 } 874 875 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) 876 : NumberFormat(rhs) 877 , fRuleSets(nullptr) 878 , ruleSetDescriptions(nullptr) 879 , numRuleSets(0) 880 , defaultRuleSet(nullptr) 881 , locale(rhs.locale) 882 , collator(nullptr) 883 , decimalFormatSymbols(nullptr) 884 , defaultInfinityRule(nullptr) 885 , defaultNaNRule(nullptr) 886 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 887 , lenient(false) 888 , lenientParseRules(nullptr) 889 , localizations(nullptr) 890 , capitalizationInfoSet(false) 891 , capitalizationForUIListMenu(false) 892 , capitalizationForStandAlone(false) 893 , capitalizationBrkIter(nullptr) 894 { 895 this->operator=(rhs); 896 } 897 898 // -------- 899 900 RuleBasedNumberFormat& 901 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) 902 { 903 if (this == &rhs) { 904 return *this; 905 } 906 NumberFormat::operator=(rhs); 907 UErrorCode status = U_ZERO_ERROR; 908 dispose(); 909 locale = rhs.locale; 910 lenient = rhs.lenient; 911 912 UParseError perror; 913 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols()); 914 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : nullptr, perror, status); 915 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status); 916 setRoundingMode(rhs.getRoundingMode()); 917 918 capitalizationInfoSet = rhs.capitalizationInfoSet; 919 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu; 920 capitalizationForStandAlone = rhs.capitalizationForStandAlone; 921 #if !UCONFIG_NO_BREAK_ITERATION 922 capitalizationBrkIter = (rhs.capitalizationBrkIter!=nullptr)? rhs.capitalizationBrkIter->clone(): nullptr; 923 #endif 924 925 return *this; 926 } 927 928 RuleBasedNumberFormat::~RuleBasedNumberFormat() 929 { 930 dispose(); 931 } 932 933 RuleBasedNumberFormat* 934 RuleBasedNumberFormat::clone() const 935 { 936 return new RuleBasedNumberFormat(*this); 937 } 938 939 bool 940 RuleBasedNumberFormat::operator==(const Format& other) const 941 { 942 if (this == &other) { 943 return true; 944 } 945 946 if (typeid(*this) == typeid(other)) { 947 const RuleBasedNumberFormat& rhs = static_cast<const RuleBasedNumberFormat&>(other); 948 // test for capitalization info equality is adequately handled 949 // by the NumberFormat test for fCapitalizationContext equality; 950 // the info here is just derived from that. 951 if (locale == rhs.locale && 952 lenient == rhs.lenient && 953 (localizations == nullptr 954 ? rhs.localizations == nullptr 955 : (rhs.localizations == nullptr 956 ? false 957 : *localizations == rhs.localizations))) { 958 959 NFRuleSet** p = fRuleSets; 960 NFRuleSet** q = rhs.fRuleSets; 961 if (p == nullptr) { 962 return q == nullptr; 963 } else if (q == nullptr) { 964 return false; 965 } 966 while (*p && *q && (**p == **q)) { 967 ++p; 968 ++q; 969 } 970 return *q == nullptr && *p == nullptr; 971 } 972 } 973 974 return false; 975 } 976 977 UnicodeString 978 RuleBasedNumberFormat::getRules() const 979 { 980 UnicodeString result; 981 if (fRuleSets != nullptr) { 982 for (NFRuleSet** p = fRuleSets; *p; ++p) { 983 (*p)->appendRules(result); 984 } 985 } 986 return result; 987 } 988 989 UnicodeString 990 RuleBasedNumberFormat::getRuleSetName(int32_t index) const 991 { 992 if (localizations) { 993 UnicodeString string(true, localizations->getRuleSetName(index), static_cast<int32_t>(-1)); 994 return string; 995 } 996 else if (fRuleSets) { 997 UnicodeString result; 998 for (NFRuleSet** p = fRuleSets; *p; ++p) { 999 NFRuleSet* rs = *p; 1000 if (rs->isPublic()) { 1001 if (--index == -1) { 1002 rs->getName(result); 1003 return result; 1004 } 1005 } 1006 } 1007 } 1008 UnicodeString empty; 1009 return empty; 1010 } 1011 1012 int32_t 1013 RuleBasedNumberFormat::getNumberOfRuleSetNames() const 1014 { 1015 int32_t result = 0; 1016 if (localizations) { 1017 result = localizations->getNumberOfRuleSets(); 1018 } 1019 else if (fRuleSets) { 1020 for (NFRuleSet** p = fRuleSets; *p; ++p) { 1021 if ((**p).isPublic()) { 1022 ++result; 1023 } 1024 } 1025 } 1026 return result; 1027 } 1028 1029 int32_t 1030 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales() const { 1031 if (localizations) { 1032 return localizations->getNumberOfDisplayLocales(); 1033 } 1034 return 0; 1035 } 1036 1037 Locale 1038 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { 1039 if (U_FAILURE(status)) { 1040 return {""}; 1041 } 1042 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { 1043 UnicodeString name(true, localizations->getLocaleName(index), -1); 1044 char buffer[64]; 1045 int32_t cap = name.length() + 1; 1046 char* bp = buffer; 1047 if (cap > 64) { 1048 bp = static_cast<char*>(uprv_malloc(cap)); 1049 if (bp == nullptr) { 1050 status = U_MEMORY_ALLOCATION_ERROR; 1051 return {""}; 1052 } 1053 } 1054 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); 1055 Locale retLocale(bp); 1056 if (bp != buffer) { 1057 uprv_free(bp); 1058 } 1059 return retLocale; 1060 } 1061 status = U_ILLEGAL_ARGUMENT_ERROR; 1062 Locale retLocale; 1063 return retLocale; 1064 } 1065 1066 UnicodeString 1067 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { 1068 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { 1069 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); 1070 int32_t len = localeName.length(); 1071 char16_t* localeStr = localeName.getBuffer(len + 1); 1072 while (len >= 0) { 1073 localeStr[len] = 0; 1074 int32_t ix = localizations->indexForLocale(localeStr); 1075 if (ix >= 0) { 1076 UnicodeString name(true, localizations->getDisplayName(ix, index), -1); 1077 return name; 1078 } 1079 1080 // trim trailing portion, skipping over omitted sections 1081 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore 1082 while (len > 0 && localeStr[len-1] == 0x005F) --len; 1083 } 1084 UnicodeString name(true, localizations->getRuleSetName(index), -1); 1085 return name; 1086 } 1087 UnicodeString bogus; 1088 bogus.setToBogus(); 1089 return bogus; 1090 } 1091 1092 UnicodeString 1093 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { 1094 if (localizations) { 1095 UnicodeString rsn(ruleSetName); 1096 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); 1097 return getRuleSetDisplayName(ix, localeParam); 1098 } 1099 UnicodeString bogus; 1100 bogus.setToBogus(); 1101 return bogus; 1102 } 1103 1104 NFRuleSet* 1105 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const 1106 { 1107 if (U_SUCCESS(status) && fRuleSets) { 1108 for (NFRuleSet** p = fRuleSets; *p; ++p) { 1109 NFRuleSet* rs = *p; 1110 if (rs->isNamed(name)) { 1111 return rs; 1112 } 1113 } 1114 status = U_ILLEGAL_ARGUMENT_ERROR; 1115 } 1116 return nullptr; 1117 } 1118 1119 UnicodeString& 1120 RuleBasedNumberFormat::format(const DecimalQuantity &number, 1121 UnicodeString& appendTo, 1122 FieldPosition& pos, 1123 UErrorCode &status) const { 1124 if (U_FAILURE(status)) { 1125 return appendTo; 1126 } 1127 DecimalQuantity copy(number); 1128 if (copy.fitsInLong()) { 1129 format(number.toLong(), appendTo, pos, status); 1130 } 1131 else { 1132 copy.roundToMagnitude(0, number::impl::RoundingMode::UNUM_ROUND_HALFEVEN, status); 1133 if (copy.fitsInLong()) { 1134 format(number.toDouble(), appendTo, pos, status); 1135 } 1136 else { 1137 // We're outside of our normal range that this framework can handle. 1138 // The DecimalFormat will provide more accurate results. 1139 1140 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. 1141 LocalPointer<NumberFormat> decimalFormat(NumberFormat::createInstance(locale, UNUM_DECIMAL, status), status); 1142 if (decimalFormat.isNull()) { 1143 return appendTo; 1144 } 1145 Formattable f; 1146 LocalPointer<DecimalQuantity> decimalQuantity(new DecimalQuantity(number), status); 1147 if (decimalQuantity.isNull()) { 1148 return appendTo; 1149 } 1150 f.adoptDecimalQuantity(decimalQuantity.orphan()); // f now owns decimalQuantity. 1151 decimalFormat->format(f, appendTo, pos, status); 1152 } 1153 } 1154 return appendTo; 1155 } 1156 1157 UnicodeString& 1158 RuleBasedNumberFormat::format(int32_t number, 1159 UnicodeString& toAppendTo, 1160 FieldPosition& pos) const 1161 { 1162 return format(static_cast<int64_t>(number), toAppendTo, pos); 1163 } 1164 1165 1166 UnicodeString& 1167 RuleBasedNumberFormat::format(int64_t number, 1168 UnicodeString& toAppendTo, 1169 FieldPosition& /* pos */) const 1170 { 1171 if (defaultRuleSet) { 1172 UErrorCode status = U_ZERO_ERROR; 1173 format(number, defaultRuleSet, toAppendTo, status); 1174 } 1175 return toAppendTo; 1176 } 1177 1178 1179 UnicodeString& 1180 RuleBasedNumberFormat::format(double number, 1181 UnicodeString& toAppendTo, 1182 FieldPosition& /* pos */) const 1183 { 1184 UErrorCode status = U_ZERO_ERROR; 1185 if (defaultRuleSet) { 1186 format(number, *defaultRuleSet, toAppendTo, status); 1187 } 1188 return toAppendTo; 1189 } 1190 1191 1192 UnicodeString& 1193 RuleBasedNumberFormat::format(int32_t number, 1194 const UnicodeString& ruleSetName, 1195 UnicodeString& toAppendTo, 1196 FieldPosition& pos, 1197 UErrorCode& status) const 1198 { 1199 return format(static_cast<int64_t>(number), ruleSetName, toAppendTo, pos, status); 1200 } 1201 1202 1203 UnicodeString& 1204 RuleBasedNumberFormat::format(int64_t number, 1205 const UnicodeString& ruleSetName, 1206 UnicodeString& toAppendTo, 1207 FieldPosition& /* pos */, 1208 UErrorCode& status) const 1209 { 1210 if (U_SUCCESS(status)) { 1211 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1212 // throw new IllegalArgumentException("Can't use internal rule set"); 1213 status = U_ILLEGAL_ARGUMENT_ERROR; 1214 } else { 1215 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1216 if (rs) { 1217 format(number, rs, toAppendTo, status); 1218 } 1219 } 1220 } 1221 return toAppendTo; 1222 } 1223 1224 1225 UnicodeString& 1226 RuleBasedNumberFormat::format(double number, 1227 const UnicodeString& ruleSetName, 1228 UnicodeString& toAppendTo, 1229 FieldPosition& /* pos */, 1230 UErrorCode& status) const 1231 { 1232 if (U_SUCCESS(status)) { 1233 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1234 // throw new IllegalArgumentException("Can't use internal rule set"); 1235 status = U_ILLEGAL_ARGUMENT_ERROR; 1236 } else { 1237 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1238 if (rs) { 1239 format(number, *rs, toAppendTo, status); 1240 } 1241 } 1242 } 1243 return toAppendTo; 1244 } 1245 1246 void 1247 RuleBasedNumberFormat::format(double number, 1248 NFRuleSet& rs, 1249 UnicodeString& toAppendTo, 1250 UErrorCode& status) const 1251 { 1252 int32_t startPos = toAppendTo.length(); 1253 if (getRoundingMode() != DecimalFormat::ERoundingMode::kRoundUnnecessary && !uprv_isNaN(number) && !uprv_isInfinite(number)) { 1254 DecimalQuantity digitList; 1255 digitList.setToDouble(number); 1256 digitList.roundToMagnitude( 1257 -getMaximumFractionDigits(), 1258 static_cast<UNumberFormatRoundingMode>(getRoundingMode()), 1259 status); 1260 number = digitList.toDouble(); 1261 } 1262 rs.format(number, toAppendTo, toAppendTo.length(), 0, status); 1263 adjustForCapitalizationContext(startPos, toAppendTo, status); 1264 } 1265 1266 /** 1267 * Bottleneck through which all the public format() methods 1268 * that take a long pass. By the time we get here, we know 1269 * which rule set we're using to do the formatting. 1270 * @param number The number to format 1271 * @param ruleSet The rule set to use to format the number 1272 * @return The text that resulted from formatting the number 1273 */ 1274 UnicodeString& 1275 RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const 1276 { 1277 // all API format() routines that take a double vector through 1278 // here. We have these two identical functions-- one taking a 1279 // double and one taking a long-- the couple digits of precision 1280 // that long has but double doesn't (both types are 8 bytes long, 1281 // but double has to borrow some of the mantissa bits to hold 1282 // the exponent). 1283 // Create an empty string buffer where the result will 1284 // be built, and pass it to the rule set (along with an insertion 1285 // position of 0 and the number being formatted) to the rule set 1286 // for formatting 1287 1288 if (U_SUCCESS(status)) { 1289 if (number == U_INT64_MIN) { 1290 // We can't handle this value right now. Provide an accurate default value. 1291 1292 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. 1293 NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); 1294 if (decimalFormat == nullptr) { 1295 return toAppendTo; 1296 } 1297 Formattable f; 1298 FieldPosition pos(FieldPosition::DONT_CARE); 1299 DecimalQuantity *decimalQuantity = new DecimalQuantity(); 1300 if (decimalQuantity == nullptr) { 1301 status = U_MEMORY_ALLOCATION_ERROR; 1302 delete decimalFormat; 1303 return toAppendTo; 1304 } 1305 decimalQuantity->setToLong(number); 1306 f.adoptDecimalQuantity(decimalQuantity); // f now owns decimalQuantity. 1307 decimalFormat->format(f, toAppendTo, pos, status); 1308 delete decimalFormat; 1309 } 1310 else { 1311 int32_t startPos = toAppendTo.length(); 1312 ruleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); 1313 adjustForCapitalizationContext(startPos, toAppendTo, status); 1314 } 1315 } 1316 return toAppendTo; 1317 } 1318 1319 UnicodeString& 1320 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos, 1321 UnicodeString& currentResult, 1322 UErrorCode& status) const 1323 { 1324 #if !UCONFIG_NO_BREAK_ITERATION 1325 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); 1326 if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == 0 && currentResult.length() > 0) { 1327 // capitalize currentResult according to context 1328 UChar32 ch = currentResult.char32At(0); 1329 if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != nullptr && 1330 ( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1331 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1332 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1333 // titlecase first word of currentResult, here use sentence iterator unlike current implementations 1334 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format 1335 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); 1336 } 1337 } 1338 #endif 1339 return currentResult; 1340 } 1341 1342 1343 void 1344 RuleBasedNumberFormat::parse(const UnicodeString& text, 1345 Formattable& result, 1346 ParsePosition& parsePosition) const 1347 { 1348 if (!fRuleSets) { 1349 parsePosition.setErrorIndex(0); 1350 return; 1351 } 1352 1353 UnicodeString workingText(text, parsePosition.getIndex()); 1354 ParsePosition workingPos(0); 1355 1356 ParsePosition high_pp(0); 1357 Formattable high_result; 1358 1359 for (NFRuleSet** p = fRuleSets; *p; ++p) { 1360 NFRuleSet *rp = *p; 1361 if (rp->isPublic() && rp->isParseable()) { 1362 ParsePosition working_pp(0); 1363 Formattable working_result; 1364 1365 rp->parse(workingText, working_pp, kMaxDouble, 0, 0, working_result); 1366 if (working_pp.getIndex() > high_pp.getIndex()) { 1367 high_pp = working_pp; 1368 high_result = working_result; 1369 1370 if (high_pp.getIndex() == workingText.length()) { 1371 break; 1372 } 1373 } 1374 } 1375 } 1376 1377 int32_t startIndex = parsePosition.getIndex(); 1378 parsePosition.setIndex(startIndex + high_pp.getIndex()); 1379 if (high_pp.getIndex() > 0) { 1380 parsePosition.setErrorIndex(-1); 1381 } else { 1382 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; 1383 parsePosition.setErrorIndex(startIndex + errorIndex); 1384 } 1385 result = high_result; 1386 if (result.getType() == Formattable::kDouble) { 1387 double d = result.getDouble(); 1388 if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) { 1389 // Note: casting a double to an int when the double is too large or small 1390 // to fit the destination is undefined behavior. The explicit range checks, 1391 // above, are required. Just casting and checking the result value is undefined. 1392 result.setLong(static_cast<int32_t>(d)); 1393 } 1394 } 1395 } 1396 1397 #if !UCONFIG_NO_COLLATION 1398 1399 void 1400 RuleBasedNumberFormat::setLenient(UBool enabled) 1401 { 1402 lenient = enabled; 1403 if (!enabled && collator) { 1404 delete collator; 1405 collator = nullptr; 1406 } 1407 } 1408 1409 #endif 1410 1411 void 1412 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { 1413 if (U_SUCCESS(status)) { 1414 if (ruleSetName.isEmpty()) { 1415 if (localizations) { 1416 UnicodeString name(true, localizations->getRuleSetName(0), -1); 1417 defaultRuleSet = findRuleSet(name, status); 1418 } else { 1419 initDefaultRuleSet(); 1420 } 1421 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { 1422 status = U_ILLEGAL_ARGUMENT_ERROR; 1423 } else { 1424 NFRuleSet* result = findRuleSet(ruleSetName, status); 1425 if (result != nullptr) { 1426 defaultRuleSet = result; 1427 } 1428 } 1429 } 1430 } 1431 1432 UnicodeString 1433 RuleBasedNumberFormat::getDefaultRuleSetName() const { 1434 UnicodeString result; 1435 if (defaultRuleSet && defaultRuleSet->isPublic()) { 1436 defaultRuleSet->getName(result); 1437 } else { 1438 result.setToBogus(); 1439 } 1440 return result; 1441 } 1442 1443 void 1444 RuleBasedNumberFormat::initDefaultRuleSet() 1445 { 1446 defaultRuleSet = nullptr; 1447 if (!fRuleSets) { 1448 return; 1449 } 1450 1451 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering")); 1452 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal")); 1453 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration")); 1454 1455 NFRuleSet**p = &fRuleSets[0]; 1456 while (*p) { 1457 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { 1458 defaultRuleSet = *p; 1459 return; 1460 } else { 1461 ++p; 1462 } 1463 } 1464 1465 defaultRuleSet = *--p; 1466 if (!defaultRuleSet->isPublic()) { 1467 while (p != fRuleSets) { 1468 if ((*--p)->isPublic()) { 1469 defaultRuleSet = *p; 1470 break; 1471 } 1472 } 1473 } 1474 } 1475 1476 1477 void 1478 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, 1479 UParseError& pErr, UErrorCode& status) 1480 { 1481 // TODO: implement UParseError 1482 uprv_memset(&pErr, 0, sizeof(UParseError)); 1483 // Note: this can leave ruleSets == nullptr, so remaining code should check 1484 if (U_FAILURE(status)) { 1485 return; 1486 } 1487 1488 initializeDecimalFormatSymbols(status); 1489 initializeDefaultInfinityRule(status); 1490 initializeDefaultNaNRule(status); 1491 if (U_FAILURE(status)) { 1492 return; 1493 } 1494 1495 this->localizations = localizationInfos == nullptr ? nullptr : localizationInfos->ref(); 1496 1497 UnicodeString description(rules); 1498 if (!description.length()) { 1499 status = U_MEMORY_ALLOCATION_ERROR; 1500 return; 1501 } 1502 1503 // start by stripping the trailing whitespace from all the rules 1504 // (this is all the whitespace following each semicolon in the 1505 // description). This allows us to look for rule-set boundaries 1506 // by searching for ";%" without having to worry about whitespace 1507 // between the ; and the % 1508 stripWhitespace(description); 1509 1510 // check to see if there's a set of lenient-parse rules. If there 1511 // is, pull them out into our temporary holding place for them, 1512 // and delete them from the description before the real desciption- 1513 // parsing code sees them 1514 int32_t lp = description.indexOf(gLenientParse, -1, 0); 1515 if (lp != -1) { 1516 // we've got to make sure we're not in the middle of a rule 1517 // (where "%%lenient-parse" would actually get treated as 1518 // rule text) 1519 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { 1520 // locate the beginning and end of the actual collation 1521 // rules (there may be whitespace between the name and 1522 // the first token in the description) 1523 int lpEnd = description.indexOf(gSemiPercent, 2, lp); 1524 1525 if (lpEnd == -1) { 1526 lpEnd = description.length() - 1; 1527 } 1528 int lpStart = lp + u_strlen(gLenientParse); 1529 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { 1530 ++lpStart; 1531 } 1532 1533 // copy out the lenient-parse rules and delete them 1534 // from the description 1535 lenientParseRules = new UnicodeString(); 1536 /* test for nullptr */ 1537 if (lenientParseRules == nullptr) { 1538 status = U_MEMORY_ALLOCATION_ERROR; 1539 return; 1540 } 1541 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); 1542 1543 description.remove(lp, lpEnd + 1 - lp); 1544 } 1545 } 1546 1547 // pre-flight parsing the description and count the number of 1548 // rule sets (";%" marks the end of one rule set and the beginning 1549 // of the next) 1550 numRuleSets = 0; 1551 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) { 1552 ++numRuleSets; 1553 ++p; 1554 } 1555 ++numRuleSets; 1556 1557 // our rule list is an array of the appropriate size 1558 fRuleSets = static_cast<NFRuleSet**>(uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet*))); 1559 /* test for nullptr */ 1560 if (fRuleSets == nullptr) { 1561 status = U_MEMORY_ALLOCATION_ERROR; 1562 return; 1563 } 1564 1565 for (int i = 0; i <= numRuleSets; ++i) { 1566 fRuleSets[i] = nullptr; 1567 } 1568 1569 // divide up the descriptions into individual rule-set descriptions 1570 // and store them in a temporary array. At each step, we also 1571 // create a rule set, but all this does is initialize its name 1572 // and remove it from its description. We can't actually parse 1573 // the rest of the descriptions and finish initializing everything 1574 // because we have to know the names and locations of all the rule 1575 // sets before we can actually set everything up 1576 if (!numRuleSets) { 1577 status = U_ILLEGAL_ARGUMENT_ERROR; 1578 return; 1579 } 1580 1581 ruleSetDescriptions = new UnicodeString[numRuleSets]; 1582 if (ruleSetDescriptions == nullptr) { 1583 status = U_MEMORY_ALLOCATION_ERROR; 1584 return; 1585 } 1586 1587 { 1588 int curRuleSet = 0; 1589 int32_t start = 0; 1590 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { 1591 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); 1592 fRuleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); 1593 if (fRuleSets[curRuleSet] == nullptr) { 1594 status = U_MEMORY_ALLOCATION_ERROR; 1595 } 1596 if (U_FAILURE(status)) { 1597 return; 1598 } 1599 ++curRuleSet; 1600 start = p + 1; 1601 } 1602 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); 1603 fRuleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); 1604 if (fRuleSets[curRuleSet] == nullptr) { 1605 status = U_MEMORY_ALLOCATION_ERROR; 1606 } 1607 if (U_FAILURE(status)) { 1608 return; 1609 } 1610 } 1611 1612 // now we can take note of the formatter's default rule set, which 1613 // is the last public rule set in the description (it's the last 1614 // rather than the first so that a user can create a new formatter 1615 // from an existing formatter and change its default behavior just 1616 // by appending more rule sets to the end) 1617 1618 // {dlf} Initialization of a fraction rule set requires the default rule 1619 // set to be known. For purposes of initialization, this is always the 1620 // last public rule set, no matter what the localization data says. 1621 initDefaultRuleSet(); 1622 1623 // Now that we know all the rule names, we can go back through 1624 // the temporary descriptions list and finish setting up the substructure 1625 // (and we throw away the temporary descriptions as we go) 1626 { 1627 for (int i = 0; i < numRuleSets; i++) { 1628 fRuleSets[i]->parseRules(ruleSetDescriptions[i], status); 1629 if (U_FAILURE(status)) { 1630 return; 1631 } 1632 } 1633 } 1634 1635 // Now that the rules are initialized, the 'real' default rule 1636 // set can be adjusted by the localization data. 1637 1638 // The C code keeps the localization array as is, rather than building 1639 // a separate array of the public rule set names, so we have less work 1640 // to do here-- but we still need to check the names. 1641 1642 if (localizationInfos) { 1643 // confirm the names, if any aren't in the rules, that's an error 1644 // it is ok if the rules contain public rule sets that are not in this list 1645 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { 1646 UnicodeString name(true, localizationInfos->getRuleSetName(i), -1); 1647 NFRuleSet* rs = findRuleSet(name, status); 1648 if (rs == nullptr) { 1649 break; // error 1650 } 1651 if (U_FAILURE(status)) { 1652 return; 1653 } 1654 if (i == 0) { 1655 defaultRuleSet = rs; 1656 } 1657 } 1658 } else { 1659 defaultRuleSet = getDefaultRuleSet(); 1660 } 1661 originalDescription = rules; 1662 } 1663 1664 // override the NumberFormat implementation in order to 1665 // lazily initialize relevant items 1666 void 1667 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status) 1668 { 1669 NumberFormat::setContext(value, status); 1670 if (U_SUCCESS(status)) { 1671 if (!capitalizationInfoSet && 1672 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) { 1673 initCapitalizationContextInfo(locale); 1674 capitalizationInfoSet = true; 1675 } 1676 #if !UCONFIG_NO_BREAK_ITERATION 1677 if ( capitalizationBrkIter == nullptr && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1678 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1679 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1680 status = U_ZERO_ERROR; 1681 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); 1682 if (U_FAILURE(status)) { 1683 delete capitalizationBrkIter; 1684 capitalizationBrkIter = nullptr; 1685 } 1686 } 1687 #endif 1688 } 1689 } 1690 1691 void 1692 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale) 1693 { 1694 #if !UCONFIG_NO_BREAK_ITERATION 1695 const char * localeID = (thelocale != nullptr)? thelocale.getBaseName(): nullptr; 1696 UErrorCode status = U_ZERO_ERROR; 1697 UResourceBundle *rb = ures_open(nullptr, localeID, &status); 1698 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status); 1699 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status); 1700 if (U_SUCCESS(status) && rb != nullptr) { 1701 int32_t len = 0; 1702 const int32_t * intVector = ures_getIntVector(rb, &len, &status); 1703 if (U_SUCCESS(status) && intVector != nullptr && len >= 2) { 1704 capitalizationForUIListMenu = static_cast<UBool>(intVector[0]); 1705 capitalizationForStandAlone = static_cast<UBool>(intVector[1]); 1706 } 1707 } 1708 ures_close(rb); 1709 #endif 1710 } 1711 1712 void 1713 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) 1714 { 1715 // iterate through the characters... 1716 UnicodeString result; 1717 1718 int start = 0; 1719 UChar ch; 1720 while (start < description.length()) { 1721 // Seek to the first non-whitespace character... 1722 // If the first non-whitespace character is semicolon, skip it and continue 1723 while (start < description.length() 1724 && (PatternProps::isWhiteSpace(ch = description.charAt(start)) || ch == gSemiColon)) 1725 { 1726 ++start; 1727 } 1728 1729 // locate the next semicolon in the text and copy the text from 1730 // our current position up to that semicolon into the result 1731 int32_t p = description.indexOf(gSemiColon, start); 1732 if (p == -1) { 1733 // or if we don't find a semicolon, just copy the rest of 1734 // the string into the result 1735 result.append(description, start, description.length() - start); 1736 break; 1737 } 1738 else if (p < description.length()) { 1739 result.append(description, start, p + 1 - start); 1740 start = p + 1; 1741 } 1742 // when we get here from the else, we've seeked off the end of the string, and 1743 // we terminate the loop (we continue until *start* is -1 rather 1744 // than until *p* is -1, because otherwise we'd miss the last 1745 // rule in the description) 1746 } 1747 1748 description.setTo(result); 1749 } 1750 1751 1752 void 1753 RuleBasedNumberFormat::dispose() 1754 { 1755 if (fRuleSets) { 1756 for (NFRuleSet** p = fRuleSets; *p; ++p) { 1757 delete *p; 1758 } 1759 uprv_free(fRuleSets); 1760 fRuleSets = nullptr; 1761 } 1762 1763 if (ruleSetDescriptions) { 1764 delete [] ruleSetDescriptions; 1765 ruleSetDescriptions = nullptr; 1766 } 1767 1768 #if !UCONFIG_NO_COLLATION 1769 delete collator; 1770 #endif 1771 collator = nullptr; 1772 1773 delete decimalFormatSymbols; 1774 decimalFormatSymbols = nullptr; 1775 1776 delete defaultInfinityRule; 1777 defaultInfinityRule = nullptr; 1778 1779 delete defaultNaNRule; 1780 defaultNaNRule = nullptr; 1781 1782 delete lenientParseRules; 1783 lenientParseRules = nullptr; 1784 1785 #if !UCONFIG_NO_BREAK_ITERATION 1786 delete capitalizationBrkIter; 1787 capitalizationBrkIter = nullptr; 1788 #endif 1789 1790 if (localizations) { 1791 localizations = localizations->unref(); 1792 } 1793 } 1794 1795 1796 //----------------------------------------------------------------------- 1797 // package-internal API 1798 //----------------------------------------------------------------------- 1799 1800 /** 1801 * Returns the collator to use for lenient parsing. The collator is lazily created: 1802 * this function creates it the first time it's called. 1803 * @return The collator to use for lenient parsing, or null if lenient parsing 1804 * is turned off. 1805 */ 1806 const RuleBasedCollator* 1807 RuleBasedNumberFormat::getCollator() const 1808 { 1809 #if !UCONFIG_NO_COLLATION 1810 if (!fRuleSets) { 1811 return nullptr; 1812 } 1813 1814 // lazy-evaluate the collator 1815 if (collator == nullptr && lenient) { 1816 // create a default collator based on the formatter's locale, 1817 // then pull out that collator's rules, append any additional 1818 // rules specified in the description, and create a _new_ 1819 // collator based on the combination of those rules 1820 1821 UErrorCode status = U_ZERO_ERROR; 1822 1823 Collator* temp = Collator::createInstance(locale, status); 1824 RuleBasedCollator* newCollator; 1825 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != nullptr) { 1826 if (lenientParseRules) { 1827 UnicodeString rules(newCollator->getRules()); 1828 rules.append(*lenientParseRules); 1829 1830 newCollator = new RuleBasedCollator(rules, status); 1831 // Exit if newCollator could not be created. 1832 if (newCollator == nullptr) { 1833 return nullptr; 1834 } 1835 } else { 1836 temp = nullptr; 1837 } 1838 if (U_SUCCESS(status)) { 1839 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); 1840 // cast away const 1841 const_cast<RuleBasedNumberFormat*>(this)->collator = newCollator; 1842 } else { 1843 delete newCollator; 1844 } 1845 } 1846 delete temp; 1847 } 1848 #endif 1849 1850 // if lenient-parse mode is off, this will be null 1851 // (see setLenientParseMode()) 1852 return collator; 1853 } 1854 1855 1856 DecimalFormatSymbols* 1857 RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status) 1858 { 1859 // lazy-evaluate the DecimalFormatSymbols object. This object 1860 // is shared by all DecimalFormat instances belonging to this 1861 // formatter 1862 if (decimalFormatSymbols == nullptr) { 1863 LocalPointer<DecimalFormatSymbols> temp(new DecimalFormatSymbols(locale, status), status); 1864 if (U_SUCCESS(status)) { 1865 decimalFormatSymbols = temp.orphan(); 1866 } 1867 } 1868 return decimalFormatSymbols; 1869 } 1870 1871 /** 1872 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat 1873 * instances owned by this formatter. 1874 */ 1875 const DecimalFormatSymbols* 1876 RuleBasedNumberFormat::getDecimalFormatSymbols() const 1877 { 1878 return decimalFormatSymbols; 1879 } 1880 1881 NFRule* 1882 RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status) 1883 { 1884 if (U_FAILURE(status)) { 1885 return nullptr; 1886 } 1887 if (defaultInfinityRule == nullptr) { 1888 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: ")); 1889 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol)); 1890 LocalPointer<NFRule> temp(new NFRule(this, rule, status), status); 1891 if (U_SUCCESS(status)) { 1892 defaultInfinityRule = temp.orphan(); 1893 } 1894 } 1895 return defaultInfinityRule; 1896 } 1897 1898 const NFRule* 1899 RuleBasedNumberFormat::getDefaultInfinityRule() const 1900 { 1901 return defaultInfinityRule; 1902 } 1903 1904 NFRule* 1905 RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status) 1906 { 1907 if (U_FAILURE(status)) { 1908 return nullptr; 1909 } 1910 if (defaultNaNRule == nullptr) { 1911 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: ")); 1912 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol)); 1913 LocalPointer<NFRule> temp(new NFRule(this, rule, status), status); 1914 if (U_SUCCESS(status)) { 1915 defaultNaNRule = temp.orphan(); 1916 } 1917 } 1918 return defaultNaNRule; 1919 } 1920 1921 const NFRule* 1922 RuleBasedNumberFormat::getDefaultNaNRule() const 1923 { 1924 return defaultNaNRule; 1925 } 1926 1927 // De-owning the current localized symbols and adopt the new symbols. 1928 void 1929 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) 1930 { 1931 if (symbolsToAdopt == nullptr) { 1932 return; // do not allow caller to set decimalFormatSymbols to nullptr 1933 } 1934 1935 delete decimalFormatSymbols; 1936 decimalFormatSymbols = symbolsToAdopt; 1937 1938 { 1939 // Apply the new decimalFormatSymbols by reparsing the rulesets 1940 UErrorCode status = U_ZERO_ERROR; 1941 1942 delete defaultInfinityRule; 1943 defaultInfinityRule = nullptr; 1944 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols 1945 1946 delete defaultNaNRule; 1947 defaultNaNRule = nullptr; 1948 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols 1949 1950 if (fRuleSets) { 1951 for (int32_t i = 0; i < numRuleSets; i++) { 1952 fRuleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status); 1953 } 1954 } 1955 } 1956 } 1957 1958 // Setting the symbols is equivalent to adopting a newly created localized symbols. 1959 void 1960 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) 1961 { 1962 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); 1963 } 1964 1965 PluralFormat * 1966 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType, 1967 const UnicodeString &pattern, 1968 UErrorCode& status) const 1969 { 1970 auto *pf = new PluralFormat(locale, pluralType, pattern, status); 1971 if (pf == nullptr) { 1972 status = U_MEMORY_ALLOCATION_ERROR; 1973 } 1974 return pf; 1975 } 1976 1977 /** 1978 * Get the rounding mode. 1979 * @return A rounding mode 1980 */ 1981 DecimalFormat::ERoundingMode RuleBasedNumberFormat::getRoundingMode() const { 1982 return fRoundingMode; 1983 } 1984 1985 /** 1986 * Set the rounding mode. This has no effect unless the rounding 1987 * increment is greater than zero. 1988 * @param roundingMode A rounding mode 1989 */ 1990 void RuleBasedNumberFormat::setRoundingMode(DecimalFormat::ERoundingMode roundingMode) { 1991 fRoundingMode = roundingMode; 1992 } 1993 1994 U_NAMESPACE_END 1995 1996 /* U_HAVE_RBNF */ 1997 #endif