nfrs.cpp (33298B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * Copyright (C) 1997-2015, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ****************************************************************************** 8 * file name: nfrs.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * Modification history 14 * Date Name Comments 15 * 10/11/2001 Doug Ported from ICU4J 16 */ 17 18 #include "nfrs.h" 19 20 #if U_HAVE_RBNF 21 22 #include "unicode/uchar.h" 23 #include "nfrule.h" 24 #include "nfrlist.h" 25 #include "patternprops.h" 26 #include "putilimp.h" 27 28 #ifdef RBNF_DEBUG 29 #include "cmemory.h" 30 #endif 31 32 enum { 33 /** -x */ 34 NEGATIVE_RULE_INDEX = 0, 35 /** x.x */ 36 IMPROPER_FRACTION_RULE_INDEX = 1, 37 /** 0.x */ 38 PROPER_FRACTION_RULE_INDEX = 2, 39 /** x.0 */ 40 DEFAULT_RULE_INDEX = 3, 41 /** Inf */ 42 INFINITY_RULE_INDEX = 4, 43 /** NaN */ 44 NAN_RULE_INDEX = 5, 45 NON_NUMERICAL_RULE_LENGTH = 6 46 }; 47 48 U_NAMESPACE_BEGIN 49 50 #if 0 51 // euclid's algorithm works with doubles 52 // note, doubles only get us up to one quadrillion or so, which 53 // isn't as much range as we get with longs. We probably still 54 // want either 64-bit math, or BigInteger. 55 56 static int64_t 57 util_lcm(int64_t x, int64_t y) 58 { 59 x.abs(); 60 y.abs(); 61 62 if (x == 0 || y == 0) { 63 return 0; 64 } else { 65 do { 66 if (x < y) { 67 int64_t t = x; x = y; y = t; 68 } 69 x -= y * (x/y); 70 } while (x != 0); 71 72 return y; 73 } 74 } 75 76 #else 77 /** 78 * Calculates the least common multiple of x and y. 79 */ 80 static int64_t 81 util_lcm(int64_t x, int64_t y) 82 { 83 // binary gcd algorithm from Knuth, "The Art of Computer Programming," 84 // vol. 2, 1st ed., pp. 298-299 85 int64_t x1 = x; 86 int64_t y1 = y; 87 88 int p2 = 0; 89 while ((x1 & 1) == 0 && (y1 & 1) == 0) { 90 ++p2; 91 x1 >>= 1; 92 y1 >>= 1; 93 } 94 95 int64_t t; 96 if ((x1 & 1) == 1) { 97 t = -y1; 98 } else { 99 t = x1; 100 } 101 102 while (t != 0) { 103 while ((t & 1) == 0) { 104 t = t >> 1; 105 } 106 if (t > 0) { 107 x1 = t; 108 } else { 109 y1 = -t; 110 } 111 t = x1 - y1; 112 } 113 114 int64_t gcd = x1 << p2; 115 116 // x * y == gcd(x, y) * lcm(x, y) 117 return x / gcd * y; 118 } 119 #endif 120 121 static const char16_t gPercent = 0x0025; 122 static const char16_t gColon = 0x003a; 123 static const char16_t gSemicolon = 0x003b; 124 static const char16_t gLineFeed = 0x000a; 125 126 static const char16_t gPercentPercent[] = 127 { 128 0x25, 0x25, 0 129 }; /* "%%" */ 130 131 static const char16_t gNoparse[] = 132 { 133 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 134 }; /* "@noparse" */ 135 136 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status) 137 : name() 138 , rules(0) 139 , owner(_owner) 140 , fractionRules() 141 , fIsFractionRuleSet(false) 142 , fIsPublic(false) 143 , fIsParseable(true) 144 { 145 for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { 146 nonNumericalRules[i] = nullptr; 147 } 148 149 if (U_FAILURE(status)) { 150 return; 151 } 152 153 UnicodeString& description = descriptions[index]; // !!! make sure index is valid 154 155 if (description.isEmpty()) { 156 // throw new IllegalArgumentException("Empty rule set description"); 157 status = U_PARSE_ERROR; 158 return; 159 } 160 161 // if the description begins with a rule set name (the rule set 162 // name can be omitted in formatter descriptions that consist 163 // of only one rule set), copy it out into our "name" member 164 // and delete it from the description 165 if (description.charAt(0) == gPercent) { 166 int32_t pos = description.indexOf(gColon); 167 // if there are no name or the name is "%". 168 if (pos < 2) { 169 // throw new IllegalArgumentException("Rule set name doesn't end in colon"); 170 status = U_PARSE_ERROR; 171 return; 172 } else { 173 name.setTo(description, 0, pos); 174 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { 175 } 176 description.remove(0, pos); 177 } 178 } else { 179 name.setTo(UNICODE_STRING_SIMPLE("%default")); 180 } 181 182 if (description.isEmpty()) { 183 // throw new IllegalArgumentException("Empty rule set description"); 184 status = U_PARSE_ERROR; 185 return; 186 } 187 188 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; 189 190 if (name.endsWith(gNoparse, 8)) { 191 fIsParseable = false; 192 name.truncate(name.length() - 8); // remove the @noparse from the name 193 } 194 195 // all of the other members of NFRuleSet are initialized 196 // by parseRules() 197 } 198 199 void 200 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status) 201 { 202 // start by creating a Vector whose elements are Strings containing 203 // the descriptions of the rules (one rule per element). The rules 204 // are separated by semicolons (there's no escape facility: ALL 205 // semicolons are rule delimiters) 206 207 if (U_FAILURE(status)) { 208 return; 209 } 210 211 // ensure we are starting with an empty rule list 212 rules.deleteAll(); 213 214 // dlf - the original code kept a separate description array for no reason, 215 // so I got rid of it. The loop was too complex so I simplified it. 216 217 UnicodeString currentDescription; 218 int32_t oldP = 0; 219 while (oldP < description.length()) { 220 int32_t p = description.indexOf(gSemicolon, oldP); 221 if (p == -1) { 222 p = description.length(); 223 } 224 currentDescription.setTo(description, oldP, p - oldP); 225 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); 226 if (U_FAILURE(status)) { 227 return; 228 } 229 oldP = p + 1; 230 } 231 232 // for rules that didn't specify a base value, their base values 233 // were initialized to 0. Make another pass through the list and 234 // set all those rules' base values. We also remove any special 235 // rules from the list and put them into their own member variables 236 int64_t defaultBaseValue = 0; 237 238 // (this isn't a for loop because we might be deleting items from 239 // the vector-- we want to make sure we only increment i when 240 // we _didn't_ delete anything from the vector) 241 int32_t rulesSize = rules.size(); 242 for (int32_t i = 0; i < rulesSize; i++) { 243 NFRule* rule = rules[i]; 244 int64_t baseValue = rule->getBaseValue(); 245 246 if (baseValue == 0) { 247 // if the rule's base value is 0, fill in a default 248 // base value (this will be 1 plus the preceding 249 // rule's base value for regular rule sets, and the 250 // same as the preceding rule's base value in fraction 251 // rule sets) 252 rule->setBaseValue(defaultBaseValue, status); 253 if (U_FAILURE(status)) { 254 return; 255 } 256 } 257 else { 258 // if it's a regular rule that already knows its base value, 259 // check to make sure the rules are in order, and update 260 // the default base value for the next rule 261 if (baseValue < defaultBaseValue) { 262 // throw new IllegalArgumentException("Rules are not in order"); 263 status = U_PARSE_ERROR; 264 return; 265 } 266 defaultBaseValue = baseValue; 267 } 268 if (!fIsFractionRuleSet) { 269 ++defaultBaseValue; 270 } 271 } 272 } 273 274 /** 275 * Set one of the non-numerical rules. 276 * @param rule The rule to set. 277 */ 278 void NFRuleSet::setNonNumericalRule(NFRule *rule) { 279 switch (rule->getBaseValue()) { 280 case NFRule::kNegativeNumberRule: 281 delete nonNumericalRules[NEGATIVE_RULE_INDEX]; 282 nonNumericalRules[NEGATIVE_RULE_INDEX] = rule; 283 return; 284 case NFRule::kImproperFractionRule: 285 setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, true); 286 return; 287 case NFRule::kProperFractionRule: 288 setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, true); 289 return; 290 case NFRule::kDefaultRule: 291 setBestFractionRule(DEFAULT_RULE_INDEX, rule, true); 292 return; 293 case NFRule::kInfinityRule: 294 delete nonNumericalRules[INFINITY_RULE_INDEX]; 295 nonNumericalRules[INFINITY_RULE_INDEX] = rule; 296 return; 297 case NFRule::kNaNRule: 298 delete nonNumericalRules[NAN_RULE_INDEX]; 299 nonNumericalRules[NAN_RULE_INDEX] = rule; 300 return; 301 case NFRule::kNoBase: 302 case NFRule::kOtherRule: 303 default: 304 // If we do not remember the rule inside the object. 305 // delete it here to prevent memory leak. 306 delete rule; 307 return; 308 } 309 } 310 311 /** 312 * Determine the best fraction rule to use. Rules matching the decimal point from 313 * DecimalFormatSymbols become the main set of rules to use. 314 * @param originalIndex The index into nonNumericalRules 315 * @param newRule The new rule to consider 316 * @param rememberRule Should the new rule be added to fractionRules. 317 */ 318 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) { 319 if (rememberRule) { 320 fractionRules.add(newRule); 321 } 322 NFRule *bestResult = nonNumericalRules[originalIndex]; 323 if (bestResult == nullptr) { 324 nonNumericalRules[originalIndex] = newRule; 325 } 326 else { 327 // We have more than one. Which one is better? 328 const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols(); 329 if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0) 330 == newRule->getDecimalPoint()) 331 { 332 nonNumericalRules[originalIndex] = newRule; 333 } 334 // else leave it alone 335 } 336 } 337 338 NFRuleSet::~NFRuleSet() 339 { 340 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { 341 if (i != IMPROPER_FRACTION_RULE_INDEX 342 && i != PROPER_FRACTION_RULE_INDEX 343 && i != DEFAULT_RULE_INDEX) 344 { 345 delete nonNumericalRules[i]; 346 } 347 // else it will be deleted via NFRuleList fractionRules 348 } 349 } 350 351 static UBool 352 util_equalRules(const NFRule* rule1, const NFRule* rule2) 353 { 354 if (rule1) { 355 if (rule2) { 356 return *rule1 == *rule2; 357 } 358 } else if (!rule2) { 359 return true; 360 } 361 return false; 362 } 363 364 bool 365 NFRuleSet::operator==(const NFRuleSet& rhs) const 366 { 367 if (rules.size() == rhs.rules.size() && 368 fIsFractionRuleSet == rhs.fIsFractionRuleSet && 369 name == rhs.name) { 370 371 // ...then compare the non-numerical rule lists... 372 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { 373 if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) { 374 return false; 375 } 376 } 377 378 // ...then compare the rule lists... 379 for (uint32_t i = 0; i < rules.size(); ++i) { 380 if (*rules[i] != *rhs.rules[i]) { 381 return false; 382 } 383 } 384 return true; 385 } 386 return false; 387 } 388 389 void 390 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) { 391 for (uint32_t i = 0; i < rules.size(); ++i) { 392 rules[i]->setDecimalFormatSymbols(newSymbols, status); 393 } 394 // Switch the fraction rules to mirror the DecimalFormatSymbols. 395 for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) { 396 if (nonNumericalRules[nonNumericalIdx]) { 397 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { 398 NFRule *fractionRule = fractionRules[fIdx]; 399 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) { 400 setBestFractionRule(nonNumericalIdx, fractionRule, false); 401 } 402 } 403 } 404 } 405 406 for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) { 407 NFRule *rule = nonNumericalRules[nnrIdx]; 408 if (rule) { 409 rule->setDecimalFormatSymbols(newSymbols, status); 410 } 411 } 412 } 413 414 #define RECURSION_LIMIT 64 415 416 void 417 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const 418 { 419 if (recursionCount >= RECURSION_LIMIT) { 420 // stop recursion 421 status = U_INVALID_STATE_ERROR; 422 return; 423 } 424 const NFRule *rule = findNormalRule(number); 425 if (rule) { // else error, but can't report it 426 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); 427 } 428 } 429 430 void 431 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const 432 { 433 if (recursionCount >= RECURSION_LIMIT) { 434 // stop recursion 435 status = U_INVALID_STATE_ERROR; 436 return; 437 } 438 const NFRule *rule = findDoubleRule(number); 439 if (rule) { // else error, but can't report it 440 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); 441 } 442 } 443 444 const NFRule* 445 NFRuleSet::findDoubleRule(double number) const 446 { 447 // if this is a fraction rule set, use findFractionRuleSetRule() 448 if (isFractionRuleSet()) { 449 return findFractionRuleSetRule(number); 450 } 451 452 if (uprv_isNaN(number)) { 453 const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX]; 454 if (!rule) { 455 rule = owner->getDefaultNaNRule(); 456 } 457 return rule; 458 } 459 460 // if the number is negative, return the negative number rule 461 // (if there isn't a negative-number rule, we pretend it's a 462 // positive number) 463 if (number < 0) { 464 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { 465 return nonNumericalRules[NEGATIVE_RULE_INDEX]; 466 } else { 467 number = -number; 468 } 469 } 470 471 if (uprv_isInfinite(number)) { 472 const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX]; 473 if (!rule) { 474 rule = owner->getDefaultInfinityRule(); 475 } 476 return rule; 477 } 478 479 // if the number isn't an integer, we use one of the fraction rules... 480 if (number != uprv_floor(number)) { 481 // if the number is between 0 and 1, return the proper 482 // fraction rule 483 if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) { 484 return nonNumericalRules[PROPER_FRACTION_RULE_INDEX]; 485 } 486 // otherwise, return the improper fraction rule 487 else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) { 488 return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]; 489 } 490 } 491 492 // if there's a default rule, use it to format the number 493 if (nonNumericalRules[DEFAULT_RULE_INDEX]) { 494 return nonNumericalRules[DEFAULT_RULE_INDEX]; 495 } 496 497 // and if we haven't yet returned a rule, use findNormalRule() 498 // to find the applicable rule 499 int64_t r = util64_fromDouble(number + 0.5); 500 return findNormalRule(r); 501 } 502 503 const NFRule * 504 NFRuleSet::findNormalRule(int64_t number) const 505 { 506 // if this is a fraction rule set, use findFractionRuleSetRule() 507 // to find the rule (we should only go into this clause if the 508 // value is 0) 509 if (fIsFractionRuleSet) { 510 return findFractionRuleSetRule(static_cast<double>(number)); 511 } 512 513 // if the number is negative, return the negative-number rule 514 // (if there isn't one, pretend the number is positive) 515 if (number < 0) { 516 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { 517 return nonNumericalRules[NEGATIVE_RULE_INDEX]; 518 } else { 519 number = -number; 520 } 521 } 522 523 // we have to repeat the preceding two checks, even though we 524 // do them in findRule(), because the version of format() that 525 // takes a long bypasses findRule() and goes straight to this 526 // function. This function does skip the fraction rules since 527 // we know the value is an integer (it also skips the default 528 // rule, since it's considered a fraction rule. Skipping the 529 // default rule in this function is also how we avoid infinite 530 // recursion) 531 532 // {dlf} unfortunately this fails if there are no rules except 533 // special rules. If there are no rules, use the default rule. 534 535 // binary-search the rule list for the applicable rule 536 // (a rule is used for all values from its base value to 537 // the next rule's base value) 538 int32_t hi = rules.size(); 539 if (hi > 0) { 540 int32_t lo = 0; 541 542 while (lo < hi) { 543 int32_t mid = (lo + hi) / 2; 544 if (rules[mid]->getBaseValue() == number) { 545 return rules[mid]; 546 } 547 else if (rules[mid]->getBaseValue() > number) { 548 hi = mid; 549 } 550 else { 551 lo = mid + 1; 552 } 553 } 554 if (hi == 0) { // bad rule set, minimum base > 0 555 return nullptr; // want to throw exception here 556 } 557 558 NFRule *result = rules[hi - 1]; 559 560 // use shouldRollBack() to see whether we need to invoke the 561 // rollback rule (see shouldRollBack()'s documentation for 562 // an explanation of the rollback rule). If we do, roll back 563 // one rule and return that one instead of the one we'd normally 564 // return 565 if (result->shouldRollBack(number)) { 566 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base 567 return nullptr; 568 } 569 result = rules[hi - 2]; 570 } 571 return result; 572 } 573 // else use the default rule 574 return nonNumericalRules[DEFAULT_RULE_INDEX]; 575 } 576 577 /** 578 * If this rule is a fraction rule set, this function is used by 579 * findRule() to select the most appropriate rule for formatting 580 * the number. Basically, the base value of each rule in the rule 581 * set is treated as the denominator of a fraction. Whichever 582 * denominator can produce the fraction closest in value to the 583 * number passed in is the result. If there's a tie, the earlier 584 * one in the list wins. (If there are two rules in a row with the 585 * same base value, the first one is used when the numerator of the 586 * fraction would be 1, and the second rule is used the rest of the 587 * time. 588 * @param number The number being formatted (which will always be 589 * a number between 0 and 1) 590 * @return The rule to use to format this number 591 */ 592 const NFRule* 593 NFRuleSet::findFractionRuleSetRule(double number) const 594 { 595 // the obvious way to do this (multiply the value being formatted 596 // by each rule's base value until you get an integral result) 597 // doesn't work because of rounding error. This method is more 598 // accurate 599 600 // find the least common multiple of the rules' base values 601 // and multiply this by the number being formatted. This is 602 // all the precision we need, and we can do all of the rest 603 // of the math using integer arithmetic 604 int64_t leastCommonMultiple = rules[0]->getBaseValue(); 605 int64_t numerator; 606 { 607 for (uint32_t i = 1; i < rules.size(); ++i) { 608 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); 609 } 610 numerator = util64_fromDouble(number * static_cast<double>(leastCommonMultiple) + 0.5); 611 } 612 // for each rule, do the following... 613 int64_t tempDifference; 614 int64_t difference = util64_fromDouble(uprv_maxMantissa()); 615 int32_t winner = 0; 616 for (uint32_t i = 0; i < rules.size(); ++i) { 617 // "numerator" is the numerator of the fraction if the 618 // denominator is the LCD. The numerator if the rule's 619 // base value is the denominator is "numerator" times the 620 // base value divided bythe LCD. Here we check to see if 621 // that's an integer, and if not, how close it is to being 622 // an integer. 623 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; 624 625 626 // normalize the result of the above calculation: we want 627 // the numerator's distance from the CLOSEST multiple 628 // of the LCD 629 if (leastCommonMultiple - tempDifference < tempDifference) { 630 tempDifference = leastCommonMultiple - tempDifference; 631 } 632 633 // if this is as close as we've come, keep track of how close 634 // that is, and the line number of the rule that did it. If 635 // we've scored a direct hit, we don't have to look at any more 636 // rules 637 if (tempDifference < difference) { 638 difference = tempDifference; 639 winner = i; 640 if (difference == 0) { 641 break; 642 } 643 } 644 } 645 646 // if we have two successive rules that both have the winning base 647 // value, then the first one (the one we found above) is used if 648 // the numerator of the fraction is 1 and the second one is used if 649 // the numerator of the fraction is anything else (this lets us 650 // do things like "one third"/"two thirds" without having to define 651 // a whole bunch of extra rule sets) 652 if (static_cast<unsigned>(winner + 1) < rules.size() && 653 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { 654 double n = static_cast<double>(rules[winner]->getBaseValue()) * number; 655 if (n < 0.5 || n >= 2) { 656 ++winner; 657 } 658 } 659 660 // finally, return the winning rule 661 return rules[winner]; 662 } 663 664 /** 665 * Parses a string. Matches the string to be parsed against each 666 * of its rules (with a base value less than upperBound) and returns 667 * the value produced by the rule that matched the most characters 668 * in the source string. 669 * @param text The string to parse 670 * @param parsePosition The initial position is ignored and assumed 671 * to be 0. On exit, this object has been updated to point to the 672 * first character position this rule set didn't consume. 673 * @param upperBound Limits the rules that can be allowed to match. 674 * Only rules whose base values are strictly less than upperBound 675 * are considered. 676 * @return The numerical result of parsing this string. This will 677 * be the matching rule's base value, composed appropriately with 678 * the results of matching any of its substitutions. The object 679 * will be an instance of Long if it's an integral value; otherwise, 680 * it will be an instance of Double. This function always returns 681 * a valid object: If nothing matched the input string at all, 682 * this function returns new Long(0), and the parse position is 683 * left unchanged. 684 */ 685 #ifdef RBNF_DEBUG 686 #include <stdio.h> 687 688 static void dumpUS(FILE* f, const UnicodeString& us) { 689 int len = us.length(); 690 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; 691 if (buf != nullptr) { 692 us.extract(0, len, buf); 693 buf[len] = 0; 694 fprintf(f, "%s", buf); 695 uprv_free(buf); //delete[] buf; 696 } 697 } 698 #endif 699 700 UBool 701 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const 702 { 703 // try matching each rule in the rule set against the text being 704 // parsed. Whichever one matches the most characters is the one 705 // that determines the value we return. 706 707 result.setLong(0); 708 709 // dump out if we've reached the recursion limit 710 if (recursionCount >= RECURSION_LIMIT) { 711 // stop recursion 712 return false; 713 } 714 715 // dump out if there's no text to parse 716 if (text.length() == 0) { 717 return 0; 718 } 719 720 ParsePosition highWaterMark; 721 ParsePosition workingPos = pos; 722 723 #ifdef RBNF_DEBUG 724 fprintf(stderr, "<nfrs> %x '", this); 725 dumpUS(stderr, name); 726 fprintf(stderr, "' text '"); 727 dumpUS(stderr, text); 728 fprintf(stderr, "'\n"); 729 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); 730 #endif 731 // Try each of the negative rules, fraction rules, infinity rules and NaN rules 732 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { 733 if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) { 734 // Mark this rule as being executed so that we don't try to execute it again. 735 nonNumericalExecutedRuleMask |= 1 << i; 736 737 Formattable tempResult; 738 UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult); 739 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { 740 result = tempResult; 741 highWaterMark = workingPos; 742 } 743 workingPos = pos; 744 } 745 } 746 #ifdef RBNF_DEBUG 747 fprintf(stderr, "<nfrs> continue other with text '"); 748 dumpUS(stderr, text); 749 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 750 #endif 751 752 // finally, go through the regular rules one at a time. We start 753 // at the end of the list because we want to try matching the most 754 // sigificant rule first (this helps ensure that we parse 755 // "five thousand three hundred six" as 756 // "(five thousand) (three hundred) (six)" rather than 757 // "((five thousand three) hundred) (six)"). Skip rules whose 758 // base values are higher than the upper bound (again, this helps 759 // limit ambiguity by making sure the rules that match a rule's 760 // are less significant than the rule containing the substitutions)/ 761 { 762 int64_t ub = util64_fromDouble(upperBound); 763 #ifdef RBNF_DEBUG 764 { 765 char ubstr[64]; 766 util64_toa(ub, ubstr, 64); 767 char ubstrhex[64]; 768 util64_toa(ub, ubstrhex, 64, 16); 769 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); 770 } 771 #endif 772 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { 773 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { 774 continue; 775 } 776 Formattable tempResult; 777 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult); 778 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 779 result = tempResult; 780 highWaterMark = workingPos; 781 } 782 workingPos = pos; 783 } 784 } 785 #ifdef RBNF_DEBUG 786 fprintf(stderr, "<nfrs> exit\n"); 787 #endif 788 // finally, update the parse position we were passed to point to the 789 // first character we didn't use, and return the result that 790 // corresponds to that string of characters 791 pos = highWaterMark; 792 793 return 1; 794 } 795 796 void 797 NFRuleSet::appendRules(UnicodeString& result) const 798 { 799 uint32_t i; 800 801 // the rule set name goes first... 802 result.append(name); 803 result.append(gColon); 804 result.append(gLineFeed); 805 806 // followed by the regular rules... 807 for (i = 0; i < rules.size(); i++) { 808 rules[i]->_appendRuleText(result); 809 result.append(gLineFeed); 810 } 811 812 // followed by the special rules (if they exist) 813 for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { 814 NFRule *rule = nonNumericalRules[i]; 815 if (nonNumericalRules[i]) { 816 if (rule->getBaseValue() == NFRule::kImproperFractionRule 817 || rule->getBaseValue() == NFRule::kProperFractionRule 818 || rule->getBaseValue() == NFRule::kDefaultRule) 819 { 820 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { 821 NFRule *fractionRule = fractionRules[fIdx]; 822 if (fractionRule->getBaseValue() == rule->getBaseValue()) { 823 fractionRule->_appendRuleText(result); 824 result.append(gLineFeed); 825 } 826 } 827 } 828 else { 829 rule->_appendRuleText(result); 830 result.append(gLineFeed); 831 } 832 } 833 } 834 } 835 836 // utility functions 837 838 int64_t util64_fromDouble(double d) { 839 int64_t result = 0; 840 if (!uprv_isNaN(d)) { 841 double mant = uprv_maxMantissa(); 842 if (d < -mant) { 843 d = -mant; 844 } else if (d > mant) { 845 d = mant; 846 } 847 UBool neg = d < 0; 848 if (neg) { 849 d = -d; 850 } 851 result = static_cast<int64_t>(uprv_floor(d)); 852 if (neg) { 853 result = -result; 854 } 855 } 856 return result; 857 } 858 859 uint64_t util64_pow(uint32_t base, uint16_t exponent) { 860 if (base == 0) { 861 return 0; 862 } 863 uint64_t result = 1; 864 uint64_t pow = base; 865 while (true) { 866 if ((exponent & 1) == 1) { 867 result *= pow; 868 } 869 exponent >>= 1; 870 if (exponent == 0) { 871 break; 872 } 873 pow *= pow; 874 } 875 return result; 876 } 877 878 static const uint8_t asciiDigits[] = { 879 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, 880 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, 881 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, 882 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, 883 0x77u, 0x78u, 0x79u, 0x7au, 884 }; 885 886 static const char16_t kUMinus = static_cast<char16_t>(0x002d); 887 888 #ifdef RBNF_DEBUG 889 static const char kMinus = '-'; 890 891 static const uint8_t digitInfo[] = { 892 0, 0, 0, 0, 0, 0, 0, 0, 893 0, 0, 0, 0, 0, 0, 0, 0, 894 0, 0, 0, 0, 0, 0, 0, 0, 895 0, 0, 0, 0, 0, 0, 0, 0, 896 0, 0, 0, 0, 0, 0, 0, 0, 897 0, 0, 0, 0, 0, 0, 0, 0, 898 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, 899 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, 900 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 901 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 902 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 903 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 904 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 905 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 906 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 907 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 908 }; 909 910 int64_t util64_atoi(const char* str, uint32_t radix) 911 { 912 if (radix > 36) { 913 radix = 36; 914 } else if (radix < 2) { 915 radix = 2; 916 } 917 int64_t lradix = radix; 918 919 int neg = 0; 920 if (*str == kMinus) { 921 ++str; 922 neg = 1; 923 } 924 int64_t result = 0; 925 uint8_t b; 926 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { 927 result *= lradix; 928 result += (int32_t)b; 929 } 930 if (neg) { 931 result = -result; 932 } 933 return result; 934 } 935 936 int64_t util64_utoi(const char16_t* str, uint32_t radix) 937 { 938 if (radix > 36) { 939 radix = 36; 940 } else if (radix < 2) { 941 radix = 2; 942 } 943 int64_t lradix = radix; 944 945 int neg = 0; 946 if (*str == kUMinus) { 947 ++str; 948 neg = 1; 949 } 950 int64_t result = 0; 951 char16_t c; 952 uint8_t b; 953 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { 954 result *= lradix; 955 result += (int32_t)b; 956 } 957 if (neg) { 958 result = -result; 959 } 960 return result; 961 } 962 963 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) 964 { 965 if (radix > 36) { 966 radix = 36; 967 } else if (radix < 2) { 968 radix = 2; 969 } 970 int64_t base = radix; 971 972 char* p = buf; 973 if (len && (w < 0) && (radix == 10) && !raw) { 974 w = -w; 975 *p++ = kMinus; 976 --len; 977 } else if (len && (w == 0)) { 978 *p++ = (char)raw ? 0 : asciiDigits[0]; 979 --len; 980 } 981 982 while (len && w != 0) { 983 int64_t n = w / base; 984 int64_t m = n * base; 985 int32_t d = (int32_t)(w-m); 986 *p++ = raw ? (char)d : asciiDigits[d]; 987 w = n; 988 --len; 989 } 990 if (len) { 991 *p = 0; // null terminate if room for caller convenience 992 } 993 994 len = p - buf; 995 if (*buf == kMinus) { 996 ++buf; 997 } 998 while (--p > buf) { 999 char c = *p; 1000 *p = *buf; 1001 *buf = c; 1002 ++buf; 1003 } 1004 1005 return len; 1006 } 1007 #endif 1008 1009 uint32_t util64_tou(int64_t w, char16_t* buf, uint32_t len, uint32_t radix, UBool raw) 1010 { 1011 if (radix > 36) { 1012 radix = 36; 1013 } else if (radix < 2) { 1014 radix = 2; 1015 } 1016 int64_t base = radix; 1017 1018 char16_t* p = buf; 1019 if (len && (w < 0) && (radix == 10) && !raw) { 1020 w = -w; 1021 *p++ = kUMinus; 1022 --len; 1023 } else if (len && (w == 0)) { 1024 *p++ = static_cast<char16_t>(raw) ? 0 : asciiDigits[0]; 1025 --len; 1026 } 1027 1028 while (len && (w != 0)) { 1029 int64_t n = w / base; 1030 int64_t m = n * base; 1031 int32_t d = static_cast<int32_t>(w - m); 1032 *p++ = static_cast<char16_t>(raw ? d : asciiDigits[d]); 1033 w = n; 1034 --len; 1035 } 1036 if (len) { 1037 *p = 0; // null terminate if room for caller convenience 1038 } 1039 1040 len = static_cast<uint32_t>(p - buf); 1041 if (*buf == kUMinus) { 1042 ++buf; 1043 } 1044 while (--p > buf) { 1045 char16_t c = *p; 1046 *p = *buf; 1047 *buf = c; 1048 ++buf; 1049 } 1050 1051 return len; 1052 } 1053 1054 1055 U_NAMESPACE_END 1056 1057 /* U_HAVE_RBNF */ 1058 #endif