numparse_currency.cpp (7206B)
1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 8 // Allow implicit conversion from char16_t* to UnicodeString for this file: 9 // Helpful in toString methods and elsewhere. 10 #define UNISTR_FROM_STRING_EXPLICIT 11 12 #include "numparse_types.h" 13 #include "numparse_currency.h" 14 #include "ucurrimp.h" 15 #include "unicode/errorcode.h" 16 #include "numparse_utils.h" 17 #include "string_segment.h" 18 19 using namespace icu; 20 using namespace icu::numparse; 21 using namespace icu::numparse::impl; 22 23 24 CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs, 25 parse_flags_t parseFlags, UErrorCode& status) 26 : fCurrency1(currencySymbols.getCurrencySymbol(status)), 27 fCurrency2(currencySymbols.getIntlCurrencySymbol(status)), 28 fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)), 29 afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)), 30 beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)), 31 fLocaleName(dfs.getLocale().getName(), -1, status) { 32 utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode()); 33 34 // Pre-load the long names for the current locale and currency 35 // if we are parsing without the full currency data. 36 if (!fUseFullCurrencyData) { 37 for (int32_t i=0; i<StandardPlural::COUNT; i++) { 38 auto plural = static_cast<StandardPlural::Form>(i); 39 fLocalLongNames[i] = currencySymbols.getPluralName(plural, status); 40 } 41 } 42 43 // TODO: Figure out how to make this faster and re-enable. 44 // Computing the "lead code points" set for fastpathing is too slow to use in production. 45 // See https://unicode-org.atlassian.net/browse/ICU-13584 46 // // Compute the full set of characters that could be the first in a currency to allow for 47 // // efficient smoke test. 48 // fLeadCodePoints.add(fCurrency1.char32At(0)); 49 // fLeadCodePoints.add(fCurrency2.char32At(0)); 50 // fLeadCodePoints.add(beforeSuffixInsert.char32At(0)); 51 // uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status); 52 // // Always apply case mapping closure for currencies 53 // fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS); 54 // fLeadCodePoints.freeze(); 55 } 56 57 bool 58 CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { 59 if (result.currencyCode[0] != 0) { 60 return false; 61 } 62 63 // Try to match a currency spacing separator. 64 int32_t initialOffset = segment.getOffset(); 65 bool maybeMore = false; 66 if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) { 67 int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert); 68 if (overlap == beforeSuffixInsert.length()) { 69 segment.adjustOffset(overlap); 70 // Note: let currency spacing be a weak match. Don't update chars consumed. 71 } 72 maybeMore = maybeMore || overlap == segment.length(); 73 } 74 75 // Match the currency string, and reset if we didn't find one. 76 maybeMore = maybeMore || matchCurrency(segment, result, status); 77 if (result.currencyCode[0] == 0) { 78 segment.setOffset(initialOffset); 79 return maybeMore; 80 } 81 82 // Try to match a currency spacing separator. 83 if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) { 84 int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert); 85 if (overlap == afterPrefixInsert.length()) { 86 segment.adjustOffset(overlap); 87 // Note: let currency spacing be a weak match. Don't update chars consumed. 88 } 89 maybeMore = maybeMore || overlap == segment.length(); 90 } 91 92 return maybeMore; 93 } 94 95 bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result, 96 UErrorCode& status) const { 97 bool maybeMore = false; 98 99 int32_t overlap1; 100 if (!fCurrency1.isEmpty()) { 101 overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1); 102 } else { 103 overlap1 = -1; 104 } 105 maybeMore = maybeMore || overlap1 == segment.length(); 106 if (overlap1 == fCurrency1.length()) { 107 utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); 108 segment.adjustOffset(overlap1); 109 result.setCharsConsumed(segment); 110 return maybeMore; 111 } 112 113 int32_t overlap2; 114 if (!fCurrency2.isEmpty()) { 115 // ISO codes should be accepted case-insensitive. 116 // https://unicode-org.atlassian.net/browse/ICU-13696 117 overlap2 = segment.getCommonPrefixLength(fCurrency2); 118 } else { 119 overlap2 = -1; 120 } 121 maybeMore = maybeMore || overlap2 == segment.length(); 122 if (overlap2 == fCurrency2.length()) { 123 utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); 124 segment.adjustOffset(overlap2); 125 result.setCharsConsumed(segment); 126 return maybeMore; 127 } 128 129 if (fUseFullCurrencyData) { 130 // Use the full currency data. 131 // NOTE: This call site should be improved with #13584. 132 const UnicodeString segmentString = segment.toTempUnicodeString(); 133 134 // Try to parse the currency 135 ParsePosition ppos(0); 136 int32_t partialMatchLen = 0; 137 uprv_parseCurrency( 138 fLocaleName.data(), 139 segmentString, 140 ppos, 141 UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME 142 &partialMatchLen, 143 result.currencyCode, 144 status); 145 maybeMore = maybeMore || partialMatchLen == segment.length(); 146 147 if (U_SUCCESS(status) && ppos.getIndex() != 0) { 148 // Complete match. 149 // NOTE: The currency code should already be saved in the ParsedNumber. 150 segment.adjustOffset(ppos.getIndex()); 151 result.setCharsConsumed(segment); 152 return maybeMore; 153 } 154 155 } else { 156 // Use the locale long names. 157 int32_t longestFullMatch = 0; 158 for (int32_t i=0; i<StandardPlural::COUNT; i++) { 159 const UnicodeString& name = fLocalLongNames[i]; 160 int32_t overlap = segment.getCommonPrefixLength(name); 161 if (overlap == name.length() && name.length() > longestFullMatch) { 162 longestFullMatch = name.length(); 163 } 164 maybeMore = maybeMore || overlap > 0; 165 } 166 if (longestFullMatch > 0) { 167 utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); 168 segment.adjustOffset(longestFullMatch); 169 result.setCharsConsumed(segment); 170 return maybeMore; 171 } 172 } 173 174 // No match found. 175 return maybeMore; 176 } 177 178 bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const { 179 // TODO: See constructor 180 return true; 181 //return segment.startsWith(fLeadCodePoints); 182 } 183 184 UnicodeString CombinedCurrencyMatcher::toString() const { 185 return u"<CombinedCurrencyMatcher>"; 186 } 187 188 189 #endif /* #if !UCONFIG_NO_FORMATTING */