tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

plurfmt.cpp (20659B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2009-2015, International Business Machines Corporation and
      6 * others. All Rights Reserved.
      7 *******************************************************************************
      8 *
      9 * File PLURFMT.CPP
     10 *******************************************************************************
     11 */
     12 
     13 #include "unicode/decimfmt.h"
     14 #include "unicode/messagepattern.h"
     15 #include "unicode/plurfmt.h"
     16 #include "unicode/plurrule.h"
     17 #include "unicode/utypes.h"
     18 #include "cmemory.h"
     19 #include "messageimpl.h"
     20 #include "nfrule.h"
     21 #include "plurrule_impl.h"
     22 #include "uassert.h"
     23 #include "uhash.h"
     24 #include "number_decimalquantity.h"
     25 #include "number_utils.h"
     26 #include "number_utypes.h"
     27 
     28 #if !UCONFIG_NO_FORMATTING
     29 
     30 U_NAMESPACE_BEGIN
     31 
     32 using number::impl::DecimalQuantity;
     33 
     34 static const char16_t OTHER_STRING[] = {
     35    0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
     36 };
     37 
     38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
     39 
     40 PluralFormat::PluralFormat(UErrorCode& status)
     41        : locale(Locale::getDefault()),
     42          msgPattern(status),
     43          numberFormat(nullptr),
     44          offset(0) {
     45    init(nullptr, UPLURAL_TYPE_CARDINAL, status);
     46 }
     47 
     48 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
     49        : locale(loc),
     50          msgPattern(status),
     51          numberFormat(nullptr),
     52          offset(0) {
     53    init(nullptr, UPLURAL_TYPE_CARDINAL, status);
     54 }
     55 
     56 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
     57        : locale(Locale::getDefault()),
     58          msgPattern(status),
     59          numberFormat(nullptr),
     60          offset(0) {
     61    init(&rules, UPLURAL_TYPE_COUNT, status);
     62 }
     63 
     64 PluralFormat::PluralFormat(const Locale& loc,
     65                           const PluralRules& rules,
     66                           UErrorCode& status)
     67        : locale(loc),
     68          msgPattern(status),
     69          numberFormat(nullptr),
     70          offset(0) {
     71    init(&rules, UPLURAL_TYPE_COUNT, status);
     72 }
     73 
     74 PluralFormat::PluralFormat(const Locale& loc,
     75                           UPluralType type,
     76                           UErrorCode& status)
     77        : locale(loc),
     78          msgPattern(status),
     79          numberFormat(nullptr),
     80          offset(0) {
     81    init(nullptr, type, status);
     82 }
     83 
     84 PluralFormat::PluralFormat(const UnicodeString& pat,
     85                           UErrorCode& status)
     86        : locale(Locale::getDefault()),
     87          msgPattern(status),
     88          numberFormat(nullptr),
     89          offset(0) {
     90    init(nullptr, UPLURAL_TYPE_CARDINAL, status);
     91    applyPattern(pat, status);
     92 }
     93 
     94 PluralFormat::PluralFormat(const Locale& loc,
     95                           const UnicodeString& pat,
     96                           UErrorCode& status)
     97        : locale(loc),
     98          msgPattern(status),
     99          numberFormat(nullptr),
    100          offset(0) {
    101    init(nullptr, UPLURAL_TYPE_CARDINAL, status);
    102    applyPattern(pat, status);
    103 }
    104 
    105 PluralFormat::PluralFormat(const PluralRules& rules,
    106                           const UnicodeString& pat,
    107                           UErrorCode& status)
    108        : locale(Locale::getDefault()),
    109          msgPattern(status),
    110          numberFormat(nullptr),
    111          offset(0) {
    112    init(&rules, UPLURAL_TYPE_COUNT, status);
    113    applyPattern(pat, status);
    114 }
    115 
    116 PluralFormat::PluralFormat(const Locale& loc,
    117                           const PluralRules& rules,
    118                           const UnicodeString& pat,
    119                           UErrorCode& status)
    120        : locale(loc),
    121          msgPattern(status),
    122          numberFormat(nullptr),
    123          offset(0) {
    124    init(&rules, UPLURAL_TYPE_COUNT, status);
    125    applyPattern(pat, status);
    126 }
    127 
    128 PluralFormat::PluralFormat(const Locale& loc,
    129                           UPluralType type,
    130                           const UnicodeString& pat,
    131                           UErrorCode& status)
    132        : locale(loc),
    133          msgPattern(status),
    134          numberFormat(nullptr),
    135          offset(0) {
    136    init(nullptr, type, status);
    137    applyPattern(pat, status);
    138 }
    139 
    140 PluralFormat::PluralFormat(const PluralFormat& other)
    141        : Format(other),
    142          locale(other.locale),
    143          msgPattern(other.msgPattern),
    144          numberFormat(nullptr),
    145          offset(other.offset) {
    146    copyObjects(other);
    147 }
    148 
    149 void
    150 PluralFormat::copyObjects(const PluralFormat& other) {
    151    UErrorCode status = U_ZERO_ERROR;
    152    delete numberFormat;
    153    delete pluralRulesWrapper.pluralRules;
    154    if (other.numberFormat == nullptr) {
    155        numberFormat = NumberFormat::createInstance(locale, status);
    156    } else {
    157        numberFormat = other.numberFormat->clone();
    158    }
    159    if (other.pluralRulesWrapper.pluralRules == nullptr) {
    160        pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
    161    } else {
    162        pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
    163    }
    164 }
    165 
    166 
    167 PluralFormat::~PluralFormat() {
    168    delete numberFormat;
    169 }
    170 
    171 void
    172 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
    173    if (U_FAILURE(status)) {
    174        return;
    175    }
    176 
    177    if (rules==nullptr) {
    178        pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
    179    } else {
    180        pluralRulesWrapper.pluralRules = rules->clone();
    181        if (pluralRulesWrapper.pluralRules == nullptr) {
    182            status = U_MEMORY_ALLOCATION_ERROR;
    183            return;
    184        }
    185    }
    186 
    187    numberFormat= NumberFormat::createInstance(locale, status);
    188 }
    189 
    190 void
    191 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
    192    msgPattern.parsePluralStyle(newPattern, nullptr, status);
    193    if (U_FAILURE(status)) {
    194        msgPattern.clear();
    195        offset = 0;
    196        return;
    197    }
    198    offset = msgPattern.getPluralOffset(0);
    199 }
    200 
    201 UnicodeString&
    202 PluralFormat::format(const Formattable& obj,
    203                   UnicodeString& appendTo,
    204                   FieldPosition& pos,
    205                   UErrorCode& status) const
    206 {
    207    if (U_FAILURE(status)) return appendTo;
    208 
    209    if (obj.isNumeric()) {
    210        return format(obj, obj.getDouble(), appendTo, pos, status);
    211    } else {
    212        status = U_ILLEGAL_ARGUMENT_ERROR;
    213        return appendTo;
    214    }
    215 }
    216 
    217 UnicodeString
    218 PluralFormat::format(int32_t number, UErrorCode& status) const {
    219    FieldPosition fpos(FieldPosition::DONT_CARE);
    220    UnicodeString result;
    221    return format(Formattable(number), number, result, fpos, status);
    222 }
    223 
    224 UnicodeString
    225 PluralFormat::format(double number, UErrorCode& status) const {
    226    FieldPosition fpos(FieldPosition::DONT_CARE);
    227    UnicodeString result;
    228    return format(Formattable(number), number, result, fpos, status);
    229 }
    230 
    231 
    232 UnicodeString&
    233 PluralFormat::format(int32_t number,
    234                     UnicodeString& appendTo,
    235                     FieldPosition& pos,
    236                     UErrorCode& status) const {
    237    return format(Formattable(number), static_cast<double>(number), appendTo, pos, status);
    238 }
    239 
    240 UnicodeString&
    241 PluralFormat::format(double number,
    242                     UnicodeString& appendTo,
    243                     FieldPosition& pos,
    244                     UErrorCode& status) const {
    245    return format(Formattable(number), number, appendTo, pos, status);
    246 }
    247 
    248 UnicodeString&
    249 PluralFormat::format(const Formattable& numberObject, double number,
    250                     UnicodeString& appendTo,
    251                     FieldPosition& pos,
    252                     UErrorCode& status) const {
    253    if (U_FAILURE(status)) {
    254        return appendTo;
    255    }
    256    if (msgPattern.countParts() == 0) {
    257        return numberFormat->format(numberObject, appendTo, pos, status);
    258    }
    259 
    260    // Get the appropriate sub-message.
    261    // Select it based on the formatted number-offset.
    262    double numberMinusOffset = number - offset;
    263    // Call NumberFormatter to get both the DecimalQuantity and the string.
    264    // This call site needs to use more internal APIs than the Java equivalent.
    265    number::impl::UFormattedNumberData data;
    266    if (offset == 0) {
    267        // could be BigDecimal etc.
    268        numberObject.populateDecimalQuantity(data.quantity, status);
    269    } else {
    270        data.quantity.setToDouble(numberMinusOffset);
    271    }
    272    UnicodeString numberString;
    273    auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
    274    if(decFmt != nullptr) {
    275        const number::LocalizedNumberFormatter* lnf = decFmt->toNumberFormatter(status);
    276        if (U_FAILURE(status)) {
    277            return appendTo;
    278        }
    279        lnf->formatImpl(&data, status); // mutates &data
    280        if (U_FAILURE(status)) {
    281            return appendTo;
    282        }
    283        numberString = data.getStringRef().toUnicodeString();
    284    } else {
    285        if (offset == 0) {
    286            numberFormat->format(numberObject, numberString, status);
    287        } else {
    288            numberFormat->format(numberMinusOffset, numberString, status);
    289        }
    290    }
    291 
    292    int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status);
    293    if (U_FAILURE(status)) { return appendTo; }
    294    // Replace syntactic # signs in the top level of this sub-message
    295    // (not in nested arguments) with the formatted number-offset.
    296    const UnicodeString& pattern = msgPattern.getPatternString();
    297    int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
    298    for (;;) {
    299        const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
    300        const UMessagePatternPartType type = part.getType();
    301        int32_t index = part.getIndex();
    302        if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
    303            return appendTo.append(pattern, prevIndex, index - prevIndex);
    304        } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
    305            (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
    306            appendTo.append(pattern, prevIndex, index - prevIndex);
    307            if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
    308                appendTo.append(numberString);
    309            }
    310            prevIndex = part.getLimit();
    311        } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
    312            appendTo.append(pattern, prevIndex, index - prevIndex);
    313            prevIndex = index;
    314            partIndex = msgPattern.getLimitPartIndex(partIndex);
    315            index = msgPattern.getPart(partIndex).getLimit();
    316            MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
    317            prevIndex = index;
    318        }
    319    }
    320 }
    321 
    322 UnicodeString&
    323 PluralFormat::toPattern(UnicodeString& appendTo) {
    324    if (0 == msgPattern.countParts()) {
    325        appendTo.setToBogus();
    326    } else {
    327        appendTo.append(msgPattern.getPatternString());
    328    }
    329    return appendTo;
    330 }
    331 
    332 void
    333 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
    334    if (U_FAILURE(status)) {
    335        return;
    336    }
    337    locale = loc;
    338    msgPattern.clear();
    339    delete numberFormat;
    340    offset = 0;
    341    numberFormat = nullptr;
    342    pluralRulesWrapper.reset();
    343    init(nullptr, UPLURAL_TYPE_CARDINAL, status);
    344 }
    345 
    346 void
    347 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
    348    if (U_FAILURE(status)) {
    349        return;
    350    }
    351    NumberFormat* nf = format->clone();
    352    if (nf != nullptr) {
    353        delete numberFormat;
    354        numberFormat = nf;
    355    } else {
    356        status = U_MEMORY_ALLOCATION_ERROR;
    357    }
    358 }
    359 
    360 PluralFormat*
    361 PluralFormat::clone() const
    362 {
    363    return new PluralFormat(*this);
    364 }
    365 
    366 
    367 PluralFormat&
    368 PluralFormat::operator=(const PluralFormat& other) {
    369    if (this != &other) {
    370        locale = other.locale;
    371        msgPattern = other.msgPattern;
    372        offset = other.offset;
    373        copyObjects(other);
    374    }
    375 
    376    return *this;
    377 }
    378 
    379 bool
    380 PluralFormat::operator==(const Format& other) const {
    381    if (this == &other) {
    382        return true;
    383    }
    384    if (!Format::operator==(other)) {
    385        return false;
    386    }
    387    const PluralFormat& o = (const PluralFormat&)other;
    388    return
    389        locale == o.locale &&
    390        msgPattern == o.msgPattern &&  // implies same offset
    391        (numberFormat == nullptr) == (o.numberFormat == nullptr) &&
    392        (numberFormat == nullptr || *numberFormat == *o.numberFormat) &&
    393        (pluralRulesWrapper.pluralRules == nullptr) == (o.pluralRulesWrapper.pluralRules == nullptr) &&
    394        (pluralRulesWrapper.pluralRules == nullptr ||
    395            *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
    396 }
    397 
    398 bool
    399 PluralFormat::operator!=(const Format& other) const {
    400    return  !operator==(other);
    401 }
    402 
    403 void
    404 PluralFormat::parseObject(const UnicodeString& /*source*/,
    405                        Formattable& /*result*/,
    406                        ParsePosition& pos) const
    407 {
    408    // Parsing not supported.
    409    pos.setErrorIndex(pos.getIndex());
    410 }
    411 
    412 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
    413                                     const PluralSelector& selector, void *context,
    414                                     double number, UErrorCode& ec) {
    415    if (U_FAILURE(ec)) {
    416        return 0;
    417    }
    418    int32_t count=pattern.countParts();
    419    double offset;
    420    const MessagePattern::Part* part=&pattern.getPart(partIndex);
    421    if (MessagePattern::Part::hasNumericValue(part->getType())) {
    422        offset=pattern.getNumericValue(*part);
    423        ++partIndex;
    424    } else {
    425        offset=0;
    426    }
    427    // The keyword is empty until we need to match against a non-explicit, not-"other" value.
    428    // Then we get the keyword from the selector.
    429    // (In other words, we never call the selector if we match against an explicit value,
    430    // or if the only non-explicit keyword is "other".)
    431    UnicodeString keyword;
    432    UnicodeString other(false, OTHER_STRING, 5);
    433    // When we find a match, we set msgStart>0 and also set this boolean to true
    434    // to avoid matching the keyword again (duplicates are allowed)
    435    // while we continue to look for an explicit-value match.
    436    UBool haveKeywordMatch=false;
    437    // msgStart is 0 until we find any appropriate sub-message.
    438    // We remember the first "other" sub-message if we have not seen any
    439    // appropriate sub-message before.
    440    // We remember the first matching-keyword sub-message if we have not seen
    441    // one of those before.
    442    // (The parser allows [does not check for] duplicate keywords.
    443    // We just have to make sure to take the first one.)
    444    // We avoid matching the keyword twice by also setting haveKeywordMatch=true
    445    // at the first keyword match.
    446    // We keep going until we find an explicit-value match or reach the end of the plural style.
    447    int32_t msgStart=0;
    448    // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
    449    // until ARG_LIMIT or end of plural-only pattern.
    450    do {
    451        part=&pattern.getPart(partIndex++);
    452        const UMessagePatternPartType type = part->getType();
    453        if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
    454            break;
    455        }
    456        U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
    457        // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
    458        if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
    459            // explicit value like "=2"
    460            part=&pattern.getPart(partIndex++);
    461            if(number==pattern.getNumericValue(*part)) {
    462                // matches explicit value
    463                return partIndex;
    464            }
    465        } else if(!haveKeywordMatch) {
    466            // plural keyword like "few" or "other"
    467            // Compare "other" first and call the selector if this is not "other".
    468            if(pattern.partSubstringMatches(*part, other)) {
    469                if(msgStart==0) {
    470                    msgStart=partIndex;
    471                    if(0 == keyword.compare(other)) {
    472                        // This is the first "other" sub-message,
    473                        // and the selected keyword is also "other".
    474                        // Do not match "other" again.
    475                        haveKeywordMatch=true;
    476                    }
    477                }
    478            } else {
    479                if(keyword.isEmpty()) {
    480                    keyword=selector.select(context, number-offset, ec);
    481                    if(msgStart!=0 && (0 == keyword.compare(other))) {
    482                        // We have already seen an "other" sub-message.
    483                        // Do not match "other" again.
    484                        haveKeywordMatch=true;
    485                        // Skip keyword matching but do getLimitPartIndex().
    486                    }
    487                }
    488                if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
    489                    // keyword matches
    490                    msgStart=partIndex;
    491                    // Do not match this keyword again.
    492                    haveKeywordMatch=true;
    493                }
    494            }
    495        }
    496        partIndex=pattern.getLimitPartIndex(partIndex);
    497    } while(++partIndex<count);
    498    return msgStart;
    499 }
    500 
    501 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
    502    // If no pattern was applied, return null.
    503    if (msgPattern.countParts() == 0) {
    504        pos.setBeginIndex(-1);
    505        pos.setEndIndex(-1);
    506        return;
    507    }
    508    int partIndex = 0;
    509    int currMatchIndex;
    510    int count=msgPattern.countParts();
    511    int startingAt = pos.getBeginIndex();
    512    if (startingAt < 0) {
    513        startingAt = 0;
    514    }
    515 
    516    // The keyword is null until we need to match against a non-explicit, not-"other" value.
    517    // Then we get the keyword from the selector.
    518    // (In other words, we never call the selector if we match against an explicit value,
    519    // or if the only non-explicit keyword is "other".)
    520    UnicodeString keyword;
    521    UnicodeString matchedWord;
    522    const UnicodeString& pattern = msgPattern.getPatternString();
    523    int matchedIndex = -1;
    524    // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
    525    // until the end of the plural-only pattern.
    526    while (partIndex < count) {
    527        const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
    528        if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
    529            // Bad format
    530            continue;
    531        }
    532 
    533        const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
    534        if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
    535            // Bad format
    536            continue;
    537        }
    538 
    539        const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
    540        if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
    541            // Bad format
    542            continue;
    543        }
    544 
    545        UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
    546        if (rbnfLenientScanner != nullptr) {
    547            // Check if non-lenient rule finds the text before call lenient parsing
    548            int32_t tempIndex = source.indexOf(currArg, startingAt);
    549            if (tempIndex >= 0) {
    550                currMatchIndex = tempIndex;
    551            } else {
    552                // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
    553                int32_t length = -1;
    554                currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
    555            }
    556        }
    557        else {
    558            currMatchIndex = source.indexOf(currArg, startingAt);
    559        }
    560        if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
    561            matchedIndex = currMatchIndex;
    562            matchedWord = currArg;
    563            keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
    564        }
    565    }
    566    if (matchedIndex >= 0) {
    567        pos.setBeginIndex(matchedIndex);
    568        pos.setEndIndex(matchedIndex + matchedWord.length());
    569        result.setString(keyword);
    570        return;
    571    }
    572 
    573    // Not found!
    574    pos.setBeginIndex(-1);
    575    pos.setEndIndex(-1);
    576 }
    577 
    578 PluralFormat::PluralSelector::~PluralSelector() {}
    579 
    580 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
    581    delete pluralRules;
    582 }
    583 
    584 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
    585                                                          UErrorCode& /*ec*/) const {
    586    (void)number;  // unused except in the assertion
    587    IFixedDecimal *dec=static_cast<IFixedDecimal *>(context);
    588    return pluralRules->select(*dec);
    589 }
    590 
    591 void PluralFormat::PluralSelectorAdapter::reset() {
    592    delete pluralRules;
    593    pluralRules = nullptr;
    594 }
    595 
    596 
    597 U_NAMESPACE_END
    598 
    599 
    600 #endif /* #if !UCONFIG_NO_FORMATTING */
    601 
    602 //eof