[ tor-browser ].git.dasho

nfrs.cpp (33298B)
      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *   Copyright (C) 1997-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 ******************************************************************************
      8 *   file name:  nfrs.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 * Modification history
     14 * Date        Name      Comments
     15 * 10/11/2001  Doug      Ported from ICU4J
     16 */
     17 
     18 #include "nfrs.h"
     19 
     20 #if U_HAVE_RBNF
     21 
     22 #include "unicode/uchar.h"
     23 #include "nfrule.h"
     24 #include "nfrlist.h"
     25 #include "patternprops.h"
     26 #include "putilimp.h"
     27 
     28 #ifdef RBNF_DEBUG
     29 #include "cmemory.h"
     30 #endif
     31 
     32 enum {
     33    /** -x */
     34    NEGATIVE_RULE_INDEX = 0,
     35    /** x.x */
     36    IMPROPER_FRACTION_RULE_INDEX = 1,
     37    /** 0.x */
     38    PROPER_FRACTION_RULE_INDEX = 2,
     39    /** x.0 */
     40    DEFAULT_RULE_INDEX = 3,
     41    /** Inf */
     42    INFINITY_RULE_INDEX = 4,
     43    /** NaN */
     44    NAN_RULE_INDEX = 5,
     45    NON_NUMERICAL_RULE_LENGTH = 6
     46 };
     47 
     48 U_NAMESPACE_BEGIN
     49 
     50 #if 0
     51 // euclid's algorithm works with doubles
     52 // note, doubles only get us up to one quadrillion or so, which
     53 // isn't as much range as we get with longs.  We probably still
     54 // want either 64-bit math, or BigInteger.
     55 
     56 static int64_t
     57 util_lcm(int64_t x, int64_t y)
     58 {
     59    x.abs();
     60    y.abs();
     61 
     62    if (x == 0 || y == 0) {
     63        return 0;
     64    } else {
     65        do {
     66            if (x < y) {
     67                int64_t t = x; x = y; y = t;
     68            }
     69            x -= y * (x/y);
     70        } while (x != 0);
     71 
     72        return y;
     73    }
     74 }
     75 
     76 #else
     77 /**
     78 * Calculates the least common multiple of x and y.
     79 */
     80 static int64_t
     81 util_lcm(int64_t x, int64_t y)
     82 {
     83    // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
     84    // vol. 2, 1st ed., pp. 298-299
     85    int64_t x1 = x;
     86    int64_t y1 = y;
     87 
     88    int p2 = 0;
     89    while ((x1 & 1) == 0 && (y1 & 1) == 0) {
     90        ++p2;
     91        x1 >>= 1;
     92        y1 >>= 1;
     93    }
     94 
     95    int64_t t;
     96    if ((x1 & 1) == 1) {
     97        t = -y1;
     98    } else {
     99        t = x1;
    100    }
    101 
    102    while (t != 0) {
    103        while ((t & 1) == 0) {
    104            t = t >> 1;
    105        }
    106        if (t > 0) {
    107            x1 = t;
    108        } else {
    109            y1 = -t;
    110        }
    111        t = x1 - y1;
    112    }
    113 
    114    int64_t gcd = x1 << p2;
    115 
    116    // x * y == gcd(x, y) * lcm(x, y)
    117    return x / gcd * y;
    118 }
    119 #endif
    120 
    121 static const char16_t gPercent = 0x0025;
    122 static const char16_t gColon = 0x003a;
    123 static const char16_t gSemicolon = 0x003b;
    124 static const char16_t gLineFeed = 0x000a;
    125 
    126 static const char16_t gPercentPercent[] =
    127 {
    128    0x25, 0x25, 0
    129 }; /* "%%" */
    130 
    131 static const char16_t gNoparse[] =
    132 {
    133    0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
    134 }; /* "@noparse" */
    135 
    136 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
    137  : name()
    138  , rules(0)
    139  , owner(_owner)
    140  , fractionRules()
    141  , fIsFractionRuleSet(false)
    142  , fIsPublic(false)
    143  , fIsParseable(true)
    144 {
    145    for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
    146        nonNumericalRules[i] = nullptr;
    147    }
    148 
    149    if (U_FAILURE(status)) {
    150        return;
    151    }
    152 
    153    UnicodeString& description = descriptions[index]; // !!! make sure index is valid
    154 
    155    if (description.isEmpty()) {
    156        // throw new IllegalArgumentException("Empty rule set description");
    157        status = U_PARSE_ERROR;
    158        return;
    159    }
    160 
    161    // if the description begins with a rule set name (the rule set
    162    // name can be omitted in formatter descriptions that consist
    163    // of only one rule set), copy it out into our "name" member
    164    // and delete it from the description
    165    if (description.charAt(0) == gPercent) {
    166        int32_t pos = description.indexOf(gColon);
    167        // if there are no name or the name is "%".
    168        if (pos < 2) {
    169            // throw new IllegalArgumentException("Rule set name doesn't end in colon");
    170            status = U_PARSE_ERROR;
    171            return;
    172        } else {
    173            name.setTo(description, 0, pos);
    174            while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
    175            }
    176            description.remove(0, pos);
    177        }
    178    } else {
    179        name.setTo(UNICODE_STRING_SIMPLE("%default"));
    180    }
    181 
    182    if (description.isEmpty()) {
    183        // throw new IllegalArgumentException("Empty rule set description");
    184        status = U_PARSE_ERROR;
    185        return;
    186    }
    187 
    188    fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
    189 
    190    if (name.endsWith(gNoparse, 8)) {
    191        fIsParseable = false;
    192        name.truncate(name.length() - 8); // remove the @noparse from the name
    193    }
    194 
    195    // all of the other members of NFRuleSet are initialized
    196    // by parseRules()
    197 }
    198 
    199 void
    200 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
    201 {
    202    // start by creating a Vector whose elements are Strings containing
    203    // the descriptions of the rules (one rule per element).  The rules
    204    // are separated by semicolons (there's no escape facility: ALL
    205    // semicolons are rule delimiters)
    206 
    207    if (U_FAILURE(status)) {
    208        return;
    209    }
    210 
    211    // ensure we are starting with an empty rule list
    212    rules.deleteAll();
    213 
    214    // dlf - the original code kept a separate description array for no reason,
    215    // so I got rid of it.  The loop was too complex so I simplified it.
    216 
    217    UnicodeString currentDescription;
    218    int32_t oldP = 0;
    219    while (oldP < description.length()) {
    220        int32_t p = description.indexOf(gSemicolon, oldP);
    221        if (p == -1) {
    222            p = description.length();
    223        }
    224        currentDescription.setTo(description, oldP, p - oldP);
    225        NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
    226        if (U_FAILURE(status)) {
    227            return;
    228        }
    229        oldP = p + 1;
    230    }
    231 
    232    // for rules that didn't specify a base value, their base values
    233    // were initialized to 0.  Make another pass through the list and
    234    // set all those rules' base values.  We also remove any special
    235    // rules from the list and put them into their own member variables
    236    int64_t defaultBaseValue = 0;
    237 
    238    // (this isn't a for loop because we might be deleting items from
    239    // the vector-- we want to make sure we only increment i when
    240    // we _didn't_ delete anything from the vector)
    241    int32_t rulesSize = rules.size();
    242    for (int32_t i = 0; i < rulesSize; i++) {
    243        NFRule* rule = rules[i];
    244        int64_t baseValue = rule->getBaseValue();
    245 
    246        if (baseValue == 0) {
    247            // if the rule's base value is 0, fill in a default
    248            // base value (this will be 1 plus the preceding
    249            // rule's base value for regular rule sets, and the
    250            // same as the preceding rule's base value in fraction
    251            // rule sets)
    252            rule->setBaseValue(defaultBaseValue, status);
    253            if (U_FAILURE(status)) {
    254                return;
    255            }
    256        }
    257        else {
    258            // if it's a regular rule that already knows its base value,
    259            // check to make sure the rules are in order, and update
    260            // the default base value for the next rule
    261            if (baseValue < defaultBaseValue) {
    262                // throw new IllegalArgumentException("Rules are not in order");
    263                status = U_PARSE_ERROR;
    264                return;
    265            }
    266            defaultBaseValue = baseValue;
    267        }
    268        if (!fIsFractionRuleSet) {
    269            ++defaultBaseValue;
    270        }
    271    }
    272 }
    273 
    274 /**
    275 * Set one of the non-numerical rules.
    276 * @param rule The rule to set.
    277 */
    278 void NFRuleSet::setNonNumericalRule(NFRule *rule) {
    279    switch (rule->getBaseValue()) {
    280        case NFRule::kNegativeNumberRule:
    281            delete nonNumericalRules[NEGATIVE_RULE_INDEX];
    282            nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
    283            return;
    284        case NFRule::kImproperFractionRule:
    285            setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, true);
    286            return;
    287        case NFRule::kProperFractionRule:
    288            setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, true);
    289            return;
    290        case NFRule::kDefaultRule:
    291            setBestFractionRule(DEFAULT_RULE_INDEX, rule, true);
    292            return;
    293        case NFRule::kInfinityRule:
    294            delete nonNumericalRules[INFINITY_RULE_INDEX];
    295            nonNumericalRules[INFINITY_RULE_INDEX] = rule;
    296            return;
    297        case NFRule::kNaNRule:
    298            delete nonNumericalRules[NAN_RULE_INDEX];
    299            nonNumericalRules[NAN_RULE_INDEX] = rule;
    300            return;
    301        case NFRule::kNoBase:
    302        case NFRule::kOtherRule:
    303        default:
    304            // If we do not remember the rule inside the object.
    305            // delete it here to prevent memory leak.
    306            delete rule;
    307            return;
    308    }
    309 }
    310 
    311 /**
    312 * Determine the best fraction rule to use. Rules matching the decimal point from
    313 * DecimalFormatSymbols become the main set of rules to use.
    314 * @param originalIndex The index into nonNumericalRules
    315 * @param newRule The new rule to consider
    316 * @param rememberRule Should the new rule be added to fractionRules.
    317 */
    318 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
    319    if (rememberRule) {
    320        fractionRules.add(newRule);
    321    }
    322    NFRule *bestResult = nonNumericalRules[originalIndex];
    323    if (bestResult == nullptr) {
    324        nonNumericalRules[originalIndex] = newRule;
    325    }
    326    else {
    327        // We have more than one. Which one is better?
    328        const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
    329        if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
    330            == newRule->getDecimalPoint())
    331        {
    332            nonNumericalRules[originalIndex] = newRule;
    333        }
    334        // else leave it alone
    335    }
    336 }
    337 
    338 NFRuleSet::~NFRuleSet()
    339 {
    340    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
    341        if (i != IMPROPER_FRACTION_RULE_INDEX
    342            && i != PROPER_FRACTION_RULE_INDEX
    343            && i != DEFAULT_RULE_INDEX)
    344        {
    345            delete nonNumericalRules[i];
    346        }
    347        // else it will be deleted via NFRuleList fractionRules
    348    }
    349 }
    350 
    351 static UBool
    352 util_equalRules(const NFRule* rule1, const NFRule* rule2)
    353 {
    354    if (rule1) {
    355        if (rule2) {
    356            return *rule1 == *rule2;
    357        }
    358    } else if (!rule2) {
    359        return true;
    360    }
    361    return false;
    362 }
    363 
    364 bool
    365 NFRuleSet::operator==(const NFRuleSet& rhs) const
    366 {
    367    if (rules.size() == rhs.rules.size() &&
    368        fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
    369        name == rhs.name) {
    370 
    371        // ...then compare the non-numerical rule lists...
    372        for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
    373            if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
    374                return false;
    375            }
    376        }
    377 
    378        // ...then compare the rule lists...
    379        for (uint32_t i = 0; i < rules.size(); ++i) {
    380            if (*rules[i] != *rhs.rules[i]) {
    381                return false;
    382            }
    383        }
    384        return true;
    385    }
    386    return false;
    387 }
    388 
    389 void
    390 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
    391    for (uint32_t i = 0; i < rules.size(); ++i) {
    392        rules[i]->setDecimalFormatSymbols(newSymbols, status);
    393    }
    394    // Switch the fraction rules to mirror the DecimalFormatSymbols.
    395    for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) {
    396        if (nonNumericalRules[nonNumericalIdx]) {
    397            for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
    398                NFRule *fractionRule = fractionRules[fIdx];
    399                if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
    400                    setBestFractionRule(nonNumericalIdx, fractionRule, false);
    401                }
    402            }
    403        }
    404    }
    405 
    406    for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
    407        NFRule *rule = nonNumericalRules[nnrIdx];
    408        if (rule) {
    409            rule->setDecimalFormatSymbols(newSymbols, status);
    410        }
    411    }
    412 }
    413 
    414 #define RECURSION_LIMIT 64
    415 
    416 void
    417 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
    418 {
    419    if (recursionCount >= RECURSION_LIMIT) {
    420        // stop recursion
    421        status = U_INVALID_STATE_ERROR;
    422        return;
    423    }
    424    const NFRule *rule = findNormalRule(number);
    425    if (rule) { // else error, but can't report it
    426        rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
    427    }
    428 }
    429 
    430 void
    431 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
    432 {
    433    if (recursionCount >= RECURSION_LIMIT) {
    434        // stop recursion
    435        status = U_INVALID_STATE_ERROR;
    436        return;
    437    }
    438    const NFRule *rule = findDoubleRule(number);
    439    if (rule) { // else error, but can't report it
    440        rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
    441    }
    442 }
    443 
    444 const NFRule*
    445 NFRuleSet::findDoubleRule(double number) const
    446 {
    447    // if this is a fraction rule set, use findFractionRuleSetRule()
    448    if (isFractionRuleSet()) {
    449        return findFractionRuleSetRule(number);
    450    }
    451 
    452    if (uprv_isNaN(number)) {
    453        const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
    454        if (!rule) {
    455            rule = owner->getDefaultNaNRule();
    456        }
    457        return rule;
    458    }
    459 
    460    // if the number is negative, return the negative number rule
    461    // (if there isn't a negative-number rule, we pretend it's a
    462    // positive number)
    463    if (number < 0) {
    464        if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
    465            return  nonNumericalRules[NEGATIVE_RULE_INDEX];
    466        } else {
    467            number = -number;
    468        }
    469    }
    470 
    471    if (uprv_isInfinite(number)) {
    472        const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
    473        if (!rule) {
    474            rule = owner->getDefaultInfinityRule();
    475        }
    476        return rule;
    477    }
    478 
    479    // if the number isn't an integer, we use one of the fraction rules...
    480    if (number != uprv_floor(number)) {
    481        // if the number is between 0 and 1, return the proper
    482        // fraction rule
    483        if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
    484            return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
    485        }
    486        // otherwise, return the improper fraction rule
    487        else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
    488            return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
    489        }
    490    }
    491 
    492    // if there's a default rule, use it to format the number
    493    if (nonNumericalRules[DEFAULT_RULE_INDEX]) {
    494        return nonNumericalRules[DEFAULT_RULE_INDEX];
    495    }
    496 
    497    // and if we haven't yet returned a rule, use findNormalRule()
    498    // to find the applicable rule
    499    int64_t r = util64_fromDouble(number + 0.5);
    500    return findNormalRule(r);
    501 }
    502 
    503 const NFRule *
    504 NFRuleSet::findNormalRule(int64_t number) const
    505 {
    506    // if this is a fraction rule set, use findFractionRuleSetRule()
    507    // to find the rule (we should only go into this clause if the
    508    // value is 0)
    509    if (fIsFractionRuleSet) {
    510        return findFractionRuleSetRule(static_cast<double>(number));
    511    }
    512 
    513    // if the number is negative, return the negative-number rule
    514    // (if there isn't one, pretend the number is positive)
    515    if (number < 0) {
    516        if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
    517            return nonNumericalRules[NEGATIVE_RULE_INDEX];
    518        } else {
    519            number = -number;
    520        }
    521    }
    522 
    523    // we have to repeat the preceding two checks, even though we
    524    // do them in findRule(), because the version of format() that
    525    // takes a long bypasses findRule() and goes straight to this
    526    // function.  This function does skip the fraction rules since
    527    // we know the value is an integer (it also skips the default
    528    // rule, since it's considered a fraction rule.  Skipping the
    529    // default rule in this function is also how we avoid infinite
    530    // recursion)
    531 
    532    // {dlf} unfortunately this fails if there are no rules except
    533    // special rules.  If there are no rules, use the default rule.
    534 
    535    // binary-search the rule list for the applicable rule
    536    // (a rule is used for all values from its base value to
    537    // the next rule's base value)
    538    int32_t hi = rules.size();
    539    if (hi > 0) {
    540        int32_t lo = 0;
    541 
    542        while (lo < hi) {
    543            int32_t mid = (lo + hi) / 2;
    544            if (rules[mid]->getBaseValue() == number) {
    545                return rules[mid];
    546            }
    547            else if (rules[mid]->getBaseValue() > number) {
    548                hi = mid;
    549            }
    550            else {
    551                lo = mid + 1;
    552            }
    553        }
    554        if (hi == 0) { // bad rule set, minimum base > 0
    555            return nullptr; // want to throw exception here
    556        }
    557 
    558        NFRule *result = rules[hi - 1];
    559 
    560        // use shouldRollBack() to see whether we need to invoke the
    561        // rollback rule (see shouldRollBack()'s documentation for
    562        // an explanation of the rollback rule).  If we do, roll back
    563        // one rule and return that one instead of the one we'd normally
    564        // return
    565        if (result->shouldRollBack(number)) {
    566            if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
    567                return nullptr;
    568            }
    569            result = rules[hi - 2];
    570        }
    571        return result;
    572    }
    573    // else use the default rule
    574    return nonNumericalRules[DEFAULT_RULE_INDEX];
    575 }
    576 
    577 /**
    578 * If this rule is a fraction rule set, this function is used by
    579 * findRule() to select the most appropriate rule for formatting
    580 * the number.  Basically, the base value of each rule in the rule
    581 * set is treated as the denominator of a fraction.  Whichever
    582 * denominator can produce the fraction closest in value to the
    583 * number passed in is the result.  If there's a tie, the earlier
    584 * one in the list wins.  (If there are two rules in a row with the
    585 * same base value, the first one is used when the numerator of the
    586 * fraction would be 1, and the second rule is used the rest of the
    587 * time.
    588 * @param number The number being formatted (which will always be
    589 * a number between 0 and 1)
    590 * @return The rule to use to format this number
    591 */
    592 const NFRule*
    593 NFRuleSet::findFractionRuleSetRule(double number) const
    594 {
    595    // the obvious way to do this (multiply the value being formatted
    596    // by each rule's base value until you get an integral result)
    597    // doesn't work because of rounding error.  This method is more
    598    // accurate
    599 
    600    // find the least common multiple of the rules' base values
    601    // and multiply this by the number being formatted.  This is
    602    // all the precision we need, and we can do all of the rest
    603    // of the math using integer arithmetic
    604    int64_t leastCommonMultiple = rules[0]->getBaseValue();
    605    int64_t numerator;
    606    {
    607        for (uint32_t i = 1; i < rules.size(); ++i) {
    608            leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
    609        }
    610        numerator = util64_fromDouble(number * static_cast<double>(leastCommonMultiple) + 0.5);
    611    }
    612    // for each rule, do the following...
    613    int64_t tempDifference;
    614    int64_t difference = util64_fromDouble(uprv_maxMantissa());
    615    int32_t winner = 0;
    616    for (uint32_t i = 0; i < rules.size(); ++i) {
    617        // "numerator" is the numerator of the fraction if the
    618        // denominator is the LCD.  The numerator if the rule's
    619        // base value is the denominator is "numerator" times the
    620        // base value divided bythe LCD.  Here we check to see if
    621        // that's an integer, and if not, how close it is to being
    622        // an integer.
    623        tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
    624 
    625 
    626        // normalize the result of the above calculation: we want
    627        // the numerator's distance from the CLOSEST multiple
    628        // of the LCD
    629        if (leastCommonMultiple - tempDifference < tempDifference) {
    630            tempDifference = leastCommonMultiple - tempDifference;
    631        }
    632 
    633        // if this is as close as we've come, keep track of how close
    634        // that is, and the line number of the rule that did it.  If
    635        // we've scored a direct hit, we don't have to look at any more
    636        // rules
    637        if (tempDifference < difference) {
    638            difference = tempDifference;
    639            winner = i;
    640            if (difference == 0) {
    641                break;
    642            }
    643        }
    644    }
    645 
    646    // if we have two successive rules that both have the winning base
    647    // value, then the first one (the one we found above) is used if
    648    // the numerator of the fraction is 1 and the second one is used if
    649    // the numerator of the fraction is anything else (this lets us
    650    // do things like "one third"/"two thirds" without having to define
    651    // a whole bunch of extra rule sets)
    652    if (static_cast<unsigned>(winner + 1) < rules.size() &&
    653        rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
    654        double n = static_cast<double>(rules[winner]->getBaseValue()) * number;
    655        if (n < 0.5 || n >= 2) {
    656            ++winner;
    657        }
    658    }
    659 
    660    // finally, return the winning rule
    661    return rules[winner];
    662 }
    663 
    664 /**
    665 * Parses a string.  Matches the string to be parsed against each
    666 * of its rules (with a base value less than upperBound) and returns
    667 * the value produced by the rule that matched the most characters
    668 * in the source string.
    669 * @param text The string to parse
    670 * @param parsePosition The initial position is ignored and assumed
    671 * to be 0.  On exit, this object has been updated to point to the
    672 * first character position this rule set didn't consume.
    673 * @param upperBound Limits the rules that can be allowed to match.
    674 * Only rules whose base values are strictly less than upperBound
    675 * are considered.
    676 * @return The numerical result of parsing this string.  This will
    677 * be the matching rule's base value, composed appropriately with
    678 * the results of matching any of its substitutions.  The object
    679 * will be an instance of Long if it's an integral value; otherwise,
    680 * it will be an instance of Double.  This function always returns
    681 * a valid object: If nothing matched the input string at all,
    682 * this function returns new Long(0), and the parse position is
    683 * left unchanged.
    684 */
    685 #ifdef RBNF_DEBUG
    686 #include <stdio.h>
    687 
    688 static void dumpUS(FILE* f, const UnicodeString& us) {
    689  int len = us.length();
    690  char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
    691  if (buf != nullptr) {
    692   us.extract(0, len, buf);
    693   buf[len] = 0;
    694   fprintf(f, "%s", buf);
    695   uprv_free(buf); //delete[] buf;
    696  }
    697 }
    698 #endif
    699 
    700 UBool
    701 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const
    702 {
    703    // try matching each rule in the rule set against the text being
    704    // parsed.  Whichever one matches the most characters is the one
    705    // that determines the value we return.
    706 
    707    result.setLong(0);
    708 
    709    // dump out if we've reached the recursion limit
    710    if (recursionCount >= RECURSION_LIMIT) {
    711        // stop recursion
    712        return false;
    713    }
    714 
    715    // dump out if there's no text to parse
    716    if (text.length() == 0) {
    717        return 0;
    718    }
    719 
    720    ParsePosition highWaterMark;
    721    ParsePosition workingPos = pos;
    722 
    723 #ifdef RBNF_DEBUG
    724    fprintf(stderr, "<nfrs> %x '", this);
    725    dumpUS(stderr, name);
    726    fprintf(stderr, "' text '");
    727    dumpUS(stderr, text);
    728    fprintf(stderr, "'\n");
    729    fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
    730 #endif
    731    // Try each of the negative rules, fraction rules, infinity rules and NaN rules
    732    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
    733        if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
    734            // Mark this rule as being executed so that we don't try to execute it again.
    735            nonNumericalExecutedRuleMask |= 1 << i;
    736 
    737            Formattable tempResult;
    738            UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
    739            if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
    740                result = tempResult;
    741                highWaterMark = workingPos;
    742            }
    743            workingPos = pos;
    744        }
    745    }
    746 #ifdef RBNF_DEBUG
    747    fprintf(stderr, "<nfrs> continue other with text '");
    748    dumpUS(stderr, text);
    749    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
    750 #endif
    751 
    752    // finally, go through the regular rules one at a time.  We start
    753    // at the end of the list because we want to try matching the most
    754    // sigificant rule first (this helps ensure that we parse
    755    // "five thousand three hundred six" as
    756    // "(five thousand) (three hundred) (six)" rather than
    757    // "((five thousand three) hundred) (six)").  Skip rules whose
    758    // base values are higher than the upper bound (again, this helps
    759    // limit ambiguity by making sure the rules that match a rule's
    760    // are less significant than the rule containing the substitutions)/
    761    {
    762        int64_t ub = util64_fromDouble(upperBound);
    763 #ifdef RBNF_DEBUG
    764        {
    765            char ubstr[64];
    766            util64_toa(ub, ubstr, 64);
    767            char ubstrhex[64];
    768            util64_toa(ub, ubstrhex, 64, 16);
    769            fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
    770        }
    771 #endif
    772        for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
    773            if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
    774                continue;
    775            }
    776            Formattable tempResult;
    777            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
    778            if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
    779                result = tempResult;
    780                highWaterMark = workingPos;
    781            }
    782            workingPos = pos;
    783        }
    784    }
    785 #ifdef RBNF_DEBUG
    786    fprintf(stderr, "<nfrs> exit\n");
    787 #endif
    788    // finally, update the parse position we were passed to point to the
    789    // first character we didn't use, and return the result that
    790    // corresponds to that string of characters
    791    pos = highWaterMark;
    792 
    793    return 1;
    794 }
    795 
    796 void
    797 NFRuleSet::appendRules(UnicodeString& result) const
    798 {
    799    uint32_t i;
    800 
    801    // the rule set name goes first...
    802    result.append(name);
    803    result.append(gColon);
    804    result.append(gLineFeed);
    805 
    806    // followed by the regular rules...
    807    for (i = 0; i < rules.size(); i++) {
    808        rules[i]->_appendRuleText(result);
    809        result.append(gLineFeed);
    810    }
    811 
    812    // followed by the special rules (if they exist)
    813    for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
    814        NFRule *rule = nonNumericalRules[i];
    815        if (nonNumericalRules[i]) {
    816            if (rule->getBaseValue() == NFRule::kImproperFractionRule
    817                || rule->getBaseValue() == NFRule::kProperFractionRule
    818                || rule->getBaseValue() == NFRule::kDefaultRule)
    819            {
    820                for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
    821                    NFRule *fractionRule = fractionRules[fIdx];
    822                    if (fractionRule->getBaseValue() == rule->getBaseValue()) {
    823                        fractionRule->_appendRuleText(result);
    824                        result.append(gLineFeed);
    825                    }
    826                }
    827            }
    828            else {
    829                rule->_appendRuleText(result);
    830                result.append(gLineFeed);
    831            }
    832        }
    833    }
    834 }
    835 
    836 // utility functions
    837 
    838 int64_t util64_fromDouble(double d) {
    839    int64_t result = 0;
    840    if (!uprv_isNaN(d)) {
    841        double mant = uprv_maxMantissa();
    842        if (d < -mant) {
    843            d = -mant;
    844        } else if (d > mant) {
    845            d = mant;
    846        }
    847        UBool neg = d < 0; 
    848        if (neg) {
    849            d = -d;
    850        }
    851        result = static_cast<int64_t>(uprv_floor(d));
    852        if (neg) {
    853            result = -result;
    854        }
    855    }
    856    return result;
    857 }
    858 
    859 uint64_t util64_pow(uint32_t base, uint16_t exponent)  {
    860    if (base == 0) {
    861        return 0;
    862    }
    863    uint64_t result = 1;
    864    uint64_t pow = base;
    865    while (true) {
    866        if ((exponent & 1) == 1) {
    867            result *= pow;
    868        }
    869        exponent >>= 1;
    870        if (exponent == 0) {
    871            break;
    872        }
    873        pow *= pow;
    874    }
    875    return result;
    876 }
    877 
    878 static const uint8_t asciiDigits[] = { 
    879    0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
    880    0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
    881    0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
    882    0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
    883    0x77u, 0x78u, 0x79u, 0x7au,  
    884 };
    885 
    886 static const char16_t kUMinus = static_cast<char16_t>(0x002d);
    887 
    888 #ifdef RBNF_DEBUG
    889 static const char kMinus = '-';
    890 
    891 static const uint8_t digitInfo[] = {
    892        0,     0,     0,     0,     0,     0,     0,     0,
    893        0,     0,     0,     0,     0,     0,     0,     0,
    894        0,     0,     0,     0,     0,     0,     0,     0,
    895        0,     0,     0,     0,     0,     0,     0,     0,
    896        0,     0,     0,     0,     0,     0,     0,     0,
    897        0,     0,     0,     0,     0,     0,     0,     0,
    898    0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
    899    0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
    900        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
    901    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
    902    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
    903    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
    904        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
    905    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
    906    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
    907    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
    908 };
    909 
    910 int64_t util64_atoi(const char* str, uint32_t radix)
    911 {
    912    if (radix > 36) {
    913        radix = 36;
    914    } else if (radix < 2) {
    915        radix = 2;
    916    }
    917    int64_t lradix = radix;
    918 
    919    int neg = 0;
    920    if (*str == kMinus) {
    921        ++str;
    922        neg = 1;
    923    }
    924    int64_t result = 0;
    925    uint8_t b;
    926    while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
    927        result *= lradix;
    928        result += (int32_t)b;
    929    }
    930    if (neg) {
    931        result = -result;
    932    }
    933    return result;
    934 }
    935 
    936 int64_t util64_utoi(const char16_t* str, uint32_t radix)
    937 {
    938    if (radix > 36) {
    939        radix = 36;
    940    } else if (radix < 2) {
    941        radix = 2;
    942    }
    943    int64_t lradix = radix;
    944 
    945    int neg = 0;
    946    if (*str == kUMinus) {
    947        ++str;
    948        neg = 1;
    949    }
    950    int64_t result = 0;
    951    char16_t c;
    952    uint8_t b;
    953    while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
    954        result *= lradix;
    955        result += (int32_t)b;
    956    }
    957    if (neg) {
    958        result = -result;
    959    }
    960    return result;
    961 }
    962 
    963 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
    964 {    
    965    if (radix > 36) {
    966        radix = 36;
    967    } else if (radix < 2) {
    968        radix = 2;
    969    }
    970    int64_t base = radix;
    971 
    972    char* p = buf;
    973    if (len && (w < 0) && (radix == 10) && !raw) {
    974        w = -w;
    975        *p++ = kMinus;
    976        --len;
    977    } else if (len && (w == 0)) {
    978        *p++ = (char)raw ? 0 : asciiDigits[0];
    979        --len;
    980    }
    981 
    982    while (len && w != 0) {
    983        int64_t n = w / base;
    984        int64_t m = n * base;
    985        int32_t d = (int32_t)(w-m);
    986        *p++ = raw ? (char)d : asciiDigits[d];
    987        w = n;
    988        --len;
    989    }
    990    if (len) {
    991        *p = 0; // null terminate if room for caller convenience
    992    }
    993 
    994    len = p - buf;
    995    if (*buf == kMinus) {
    996        ++buf;
    997    }
    998    while (--p > buf) {
    999        char c = *p;
   1000        *p = *buf;
   1001        *buf = c;
   1002        ++buf;
   1003    }
   1004 
   1005    return len;
   1006 }
   1007 #endif
   1008 
   1009 uint32_t util64_tou(int64_t w, char16_t* buf, uint32_t len, uint32_t radix, UBool raw)
   1010 {    
   1011    if (radix > 36) {
   1012        radix = 36;
   1013    } else if (radix < 2) {
   1014        radix = 2;
   1015    }
   1016    int64_t base = radix;
   1017 
   1018    char16_t* p = buf;
   1019    if (len && (w < 0) && (radix == 10) && !raw) {
   1020        w = -w;
   1021        *p++ = kUMinus;
   1022        --len;
   1023    } else if (len && (w == 0)) {
   1024        *p++ = static_cast<char16_t>(raw) ? 0 : asciiDigits[0];
   1025        --len;
   1026    }
   1027 
   1028    while (len && (w != 0)) {
   1029        int64_t n = w / base;
   1030        int64_t m = n * base;
   1031        int32_t d = static_cast<int32_t>(w - m);
   1032        *p++ = static_cast<char16_t>(raw ? d : asciiDigits[d]);
   1033        w = n;
   1034        --len;
   1035    }
   1036    if (len) {
   1037        *p = 0; // null terminate if room for caller convenience
   1038    }
   1039 
   1040    len = static_cast<uint32_t>(p - buf);
   1041    if (*buf == kUMinus) {
   1042        ++buf;
   1043    }
   1044    while (--p > buf) {
   1045        char16_t c = *p;
   1046        *p = *buf;
   1047        *buf = c;
   1048        ++buf;
   1049    }
   1050 
   1051    return len;
   1052 }
   1053 
   1054 
   1055 U_NAMESPACE_END
   1056 
   1057 /* U_HAVE_RBNF */
   1058 #endif
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE