numparse_affixes.h (7029B)
1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 #ifndef __NUMPARSE_AFFIXES_H__ 8 #define __NUMPARSE_AFFIXES_H__ 9 10 #include "cmemory.h" 11 12 #include "numparse_types.h" 13 #include "numparse_symbols.h" 14 #include "numparse_currency.h" 15 #include "number_affixutils.h" 16 #include "number_currencysymbols.h" 17 18 U_NAMESPACE_BEGIN 19 20 namespace numparse::impl { 21 22 // Forward-declaration of implementation classes for friending 23 class AffixPatternMatcherBuilder; 24 class AffixPatternMatcher; 25 26 using ::icu::number::impl::AffixPatternProvider; 27 using ::icu::number::impl::TokenConsumer; 28 using ::icu::number::impl::CurrencySymbols; 29 30 31 class CodePointMatcher : public NumberParseMatcher, public UMemory { 32 public: 33 CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state 34 35 CodePointMatcher(UChar32 cp); 36 37 bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; 38 39 bool smokeTest(const StringSegment& segment) const override; 40 41 UnicodeString toString() const override; 42 43 private: 44 UChar32 fCp; 45 }; 46 47 48 struct AffixTokenMatcherSetupData { 49 const CurrencySymbols& currencySymbols; 50 const DecimalFormatSymbols& dfs; 51 IgnorablesMatcher& ignorables; 52 const Locale& locale; 53 parse_flags_t parseFlags; 54 }; 55 56 57 /** 58 * Small helper class that generates matchers for individual tokens for AffixPatternMatcher. 59 * 60 * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a 61 * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The 62 * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from 63 * the warehouse. 64 * 65 * @author sffc 66 */ 67 // Exported as U_I18N_API_CLASS for tests 68 class U_I18N_API_CLASS AffixTokenMatcherWarehouse : public UMemory { 69 public: 70 AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state 71 72 U_I18N_API AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData); 73 74 NumberParseMatcher& minusSign(); 75 76 NumberParseMatcher& plusSign(); 77 78 NumberParseMatcher& approximatelySign(); 79 80 NumberParseMatcher& percent(); 81 82 NumberParseMatcher& permille(); 83 84 U_I18N_API NumberParseMatcher& currency(UErrorCode& status); 85 86 IgnorablesMatcher& ignorables(); 87 88 NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status); 89 90 bool hasEmptyCurrencySymbol() const; 91 92 private: 93 // NOTE: The following field may be unsafe to access after construction is done! 94 const AffixTokenMatcherSetupData* fSetupData; 95 96 // NOTE: These are default-constructed and should not be used until initialized. 97 MinusSignMatcher fMinusSign; 98 PlusSignMatcher fPlusSign; 99 ApproximatelySignMatcher fApproximatelySign; 100 PercentMatcher fPercent; 101 PermilleMatcher fPermille; 102 CombinedCurrencyMatcher fCurrency; 103 104 // Use a child class for code point matchers, since it requires non-default operators. 105 MemoryPool<CodePointMatcher> fCodePoints; 106 107 friend class AffixPatternMatcherBuilder; 108 friend class AffixPatternMatcher; 109 }; 110 111 112 class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection { 113 public: 114 AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse, 115 IgnorablesMatcher* ignorables); 116 117 void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override; 118 119 /** NOTE: You can build only once! */ 120 AffixPatternMatcher build(UErrorCode& status); 121 122 private: 123 ArraySeriesMatcher::MatcherArray fMatchers; 124 int32_t fMatchersLen; 125 int32_t fLastTypeOrCp; 126 127 const UnicodeString& fPattern; 128 AffixTokenMatcherWarehouse& fWarehouse; 129 IgnorablesMatcher* fIgnorables; 130 131 void addMatcher(NumberParseMatcher& matcher) override; 132 }; 133 134 135 // Exported as U_I18N_API_CLASS for tests 136 class U_I18N_API_CLASS AffixPatternMatcher : public ArraySeriesMatcher { 137 public: 138 AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state 139 140 U_I18N_API static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern, 141 AffixTokenMatcherWarehouse& warehouse, 142 parse_flags_t parseFlags, bool* success, 143 UErrorCode& status); 144 145 UnicodeString getPattern() const; 146 147 bool operator==(const AffixPatternMatcher& other) const; 148 149 private: 150 CompactUnicodeString<4> fPattern; 151 152 AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern, 153 UErrorCode& status); 154 155 friend class AffixPatternMatcherBuilder; 156 }; 157 158 159 class AffixMatcher : public NumberParseMatcher, public UMemory { 160 public: 161 AffixMatcher() = default; // WARNING: Leaves the object in an unusable state 162 163 AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags); 164 165 bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; 166 167 void postProcess(ParsedNumber& result) const override; 168 169 bool smokeTest(const StringSegment& segment) const override; 170 171 int8_t compareTo(const AffixMatcher& rhs) const; 172 173 UnicodeString toString() const override; 174 175 private: 176 AffixPatternMatcher* fPrefix; 177 AffixPatternMatcher* fSuffix; 178 result_flags_t fFlags; 179 }; 180 181 182 /** 183 * A C++-only class to retain ownership of the AffixMatchers needed for parsing. 184 */ 185 class AffixMatcherWarehouse { 186 public: 187 AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state 188 189 AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse); 190 191 void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output, 192 const IgnorablesMatcher& ignorables, parse_flags_t parseFlags, 193 UErrorCode& status); 194 195 private: 196 // 18 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix, 197 // and doubled since there may be an empty currency symbol 198 AffixMatcher fAffixMatchers[18]; 199 // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each, 200 // and doubled since there may be an empty currency symbol 201 AffixPatternMatcher fAffixPatternMatchers[12]; 202 // Reference to the warehouse for tokens used by the AffixPatternMatchers 203 AffixTokenMatcherWarehouse* fTokenWarehouse; 204 205 friend class AffixMatcher; 206 207 static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables, 208 parse_flags_t parseFlags, UErrorCode& status); 209 }; 210 211 } // namespace numparse::impl 212 213 U_NAMESPACE_END 214 215 #endif //__NUMPARSE_AFFIXES_H__ 216 #endif /* #if !UCONFIG_NO_FORMATTING */