tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

numparse_currency.cpp (7206B)


      1 // © 2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 
      8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
      9 // Helpful in toString methods and elsewhere.
     10 #define UNISTR_FROM_STRING_EXPLICIT
     11 
     12 #include "numparse_types.h"
     13 #include "numparse_currency.h"
     14 #include "ucurrimp.h"
     15 #include "unicode/errorcode.h"
     16 #include "numparse_utils.h"
     17 #include "string_segment.h"
     18 
     19 using namespace icu;
     20 using namespace icu::numparse;
     21 using namespace icu::numparse::impl;
     22 
     23 
     24 CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs,
     25                                                 parse_flags_t parseFlags, UErrorCode& status)
     26        : fCurrency1(currencySymbols.getCurrencySymbol(status)),
     27          fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
     28          fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)),
     29          afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
     30          beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
     31          fLocaleName(dfs.getLocale().getName(), -1, status) {
     32    utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
     33 
     34    // Pre-load the long names for the current locale and currency
     35    // if we are parsing without the full currency data.
     36    if (!fUseFullCurrencyData) {
     37        for (int32_t i=0; i<StandardPlural::COUNT; i++) {
     38            auto plural = static_cast<StandardPlural::Form>(i);
     39            fLocalLongNames[i] = currencySymbols.getPluralName(plural, status);
     40        }
     41    }
     42 
     43    // TODO: Figure out how to make this faster and re-enable.
     44    // Computing the "lead code points" set for fastpathing is too slow to use in production.
     45    // See https://unicode-org.atlassian.net/browse/ICU-13584
     46 //    // Compute the full set of characters that could be the first in a currency to allow for
     47 //    // efficient smoke test.
     48 //    fLeadCodePoints.add(fCurrency1.char32At(0));
     49 //    fLeadCodePoints.add(fCurrency2.char32At(0));
     50 //    fLeadCodePoints.add(beforeSuffixInsert.char32At(0));
     51 //    uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
     52 //    // Always apply case mapping closure for currencies
     53 //    fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
     54 //    fLeadCodePoints.freeze();
     55 }
     56 
     57 bool
     58 CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
     59    if (result.currencyCode[0] != 0) {
     60        return false;
     61    }
     62 
     63    // Try to match a currency spacing separator.
     64    int32_t initialOffset = segment.getOffset();
     65    bool maybeMore = false;
     66    if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
     67        int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
     68        if (overlap == beforeSuffixInsert.length()) {
     69            segment.adjustOffset(overlap);
     70            // Note: let currency spacing be a weak match. Don't update chars consumed.
     71        }
     72        maybeMore = maybeMore || overlap == segment.length();
     73    }
     74 
     75    // Match the currency string, and reset if we didn't find one.
     76    maybeMore = maybeMore || matchCurrency(segment, result, status);
     77    if (result.currencyCode[0] == 0) {
     78        segment.setOffset(initialOffset);
     79        return maybeMore;
     80    }
     81 
     82    // Try to match a currency spacing separator.
     83    if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
     84        int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
     85        if (overlap == afterPrefixInsert.length()) {
     86            segment.adjustOffset(overlap);
     87            // Note: let currency spacing be a weak match. Don't update chars consumed.
     88        }
     89        maybeMore = maybeMore || overlap == segment.length();
     90    }
     91 
     92    return maybeMore;
     93 }
     94 
     95 bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
     96                                            UErrorCode& status) const {
     97    bool maybeMore = false;
     98 
     99    int32_t overlap1;
    100    if (!fCurrency1.isEmpty()) {
    101        overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
    102    } else {
    103        overlap1 = -1;
    104    }
    105    maybeMore = maybeMore || overlap1 == segment.length();
    106    if (overlap1 == fCurrency1.length()) {
    107        utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
    108        segment.adjustOffset(overlap1);
    109        result.setCharsConsumed(segment);
    110        return maybeMore;
    111    }
    112 
    113    int32_t overlap2;
    114    if (!fCurrency2.isEmpty()) {
    115        // ISO codes should be accepted case-insensitive.
    116        // https://unicode-org.atlassian.net/browse/ICU-13696
    117        overlap2 = segment.getCommonPrefixLength(fCurrency2);
    118    } else {
    119        overlap2 = -1;
    120    }
    121    maybeMore = maybeMore || overlap2 == segment.length();
    122    if (overlap2 == fCurrency2.length()) {
    123        utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
    124        segment.adjustOffset(overlap2);
    125        result.setCharsConsumed(segment);
    126        return maybeMore;
    127    }
    128 
    129    if (fUseFullCurrencyData) {
    130        // Use the full currency data.
    131        // NOTE: This call site should be improved with #13584.
    132        const UnicodeString segmentString = segment.toTempUnicodeString();
    133 
    134        // Try to parse the currency
    135        ParsePosition ppos(0);
    136        int32_t partialMatchLen = 0;
    137        uprv_parseCurrency(
    138                fLocaleName.data(),
    139                segmentString,
    140                ppos,
    141                UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME
    142                &partialMatchLen,
    143                result.currencyCode,
    144                status);
    145        maybeMore = maybeMore || partialMatchLen == segment.length();
    146 
    147        if (U_SUCCESS(status) && ppos.getIndex() != 0) {
    148            // Complete match.
    149            // NOTE: The currency code should already be saved in the ParsedNumber.
    150            segment.adjustOffset(ppos.getIndex());
    151            result.setCharsConsumed(segment);
    152            return maybeMore;
    153        }
    154 
    155    } else {
    156        // Use the locale long names.
    157        int32_t longestFullMatch = 0;
    158        for (int32_t i=0; i<StandardPlural::COUNT; i++) {
    159            const UnicodeString& name = fLocalLongNames[i];
    160            int32_t overlap = segment.getCommonPrefixLength(name);
    161            if (overlap == name.length() && name.length() > longestFullMatch) {
    162                longestFullMatch = name.length();
    163            }
    164            maybeMore = maybeMore || overlap > 0;
    165        }
    166        if (longestFullMatch > 0) {
    167            utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
    168            segment.adjustOffset(longestFullMatch);
    169            result.setCharsConsumed(segment);
    170            return maybeMore;
    171        }
    172    }
    173 
    174    // No match found.
    175    return maybeMore;
    176 }
    177 
    178 bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const {
    179    // TODO: See constructor
    180    return true;
    181    //return segment.startsWith(fLeadCodePoints);
    182 }
    183 
    184 UnicodeString CombinedCurrencyMatcher::toString() const {
    185    return u"<CombinedCurrencyMatcher>";
    186 }
    187 
    188 
    189 #endif /* #if !UCONFIG_NO_FORMATTING */