tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

numparse_affixes.h (7029B)


      1 // © 2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 #ifndef __NUMPARSE_AFFIXES_H__
      8 #define __NUMPARSE_AFFIXES_H__
      9 
     10 #include "cmemory.h"
     11 
     12 #include "numparse_types.h"
     13 #include "numparse_symbols.h"
     14 #include "numparse_currency.h"
     15 #include "number_affixutils.h"
     16 #include "number_currencysymbols.h"
     17 
     18 U_NAMESPACE_BEGIN
     19 
     20 namespace numparse::impl {
     21 
     22 // Forward-declaration of implementation classes for friending
     23 class AffixPatternMatcherBuilder;
     24 class AffixPatternMatcher;
     25 
     26 using ::icu::number::impl::AffixPatternProvider;
     27 using ::icu::number::impl::TokenConsumer;
     28 using ::icu::number::impl::CurrencySymbols;
     29 
     30 
     31 class CodePointMatcher : public NumberParseMatcher, public UMemory {
     32  public:
     33    CodePointMatcher() = default;  // WARNING: Leaves the object in an unusable state
     34 
     35    CodePointMatcher(UChar32 cp);
     36 
     37    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
     38 
     39    bool smokeTest(const StringSegment& segment) const override;
     40 
     41    UnicodeString toString() const override;
     42 
     43  private:
     44    UChar32 fCp;
     45 };
     46 
     47 
     48 struct AffixTokenMatcherSetupData {
     49    const CurrencySymbols& currencySymbols;
     50    const DecimalFormatSymbols& dfs;
     51    IgnorablesMatcher& ignorables;
     52    const Locale& locale;
     53    parse_flags_t parseFlags;
     54 };
     55 
     56 
     57 /**
     58 * Small helper class that generates matchers for individual tokens for AffixPatternMatcher.
     59 *
     60 * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a
     61 * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The
     62 * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from
     63 * the warehouse.
     64 *
     65 * @author sffc
     66 */
     67 // Exported as U_I18N_API_CLASS for tests
     68 class U_I18N_API_CLASS AffixTokenMatcherWarehouse : public UMemory {
     69  public:
     70    AffixTokenMatcherWarehouse() = default;  // WARNING: Leaves the object in an unusable state
     71 
     72    U_I18N_API AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData);
     73 
     74    NumberParseMatcher& minusSign();
     75 
     76    NumberParseMatcher& plusSign();
     77 
     78    NumberParseMatcher& approximatelySign();
     79 
     80    NumberParseMatcher& percent();
     81 
     82    NumberParseMatcher& permille();
     83 
     84    U_I18N_API NumberParseMatcher& currency(UErrorCode& status);
     85 
     86    IgnorablesMatcher& ignorables();
     87 
     88    NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status);
     89 
     90    bool hasEmptyCurrencySymbol() const;
     91 
     92  private:
     93    // NOTE: The following field may be unsafe to access after construction is done!
     94    const AffixTokenMatcherSetupData* fSetupData;
     95 
     96    // NOTE: These are default-constructed and should not be used until initialized.
     97    MinusSignMatcher fMinusSign;
     98    PlusSignMatcher fPlusSign;
     99    ApproximatelySignMatcher fApproximatelySign;
    100    PercentMatcher fPercent;
    101    PermilleMatcher fPermille;
    102    CombinedCurrencyMatcher fCurrency;
    103 
    104    // Use a child class for code point matchers, since it requires non-default operators.
    105    MemoryPool<CodePointMatcher> fCodePoints;
    106 
    107    friend class AffixPatternMatcherBuilder;
    108    friend class AffixPatternMatcher;
    109 };
    110 
    111 
    112 class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection {
    113  public:
    114    AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse,
    115                               IgnorablesMatcher* ignorables);
    116 
    117    void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override;
    118 
    119    /** NOTE: You can build only once! */
    120    AffixPatternMatcher build(UErrorCode& status);
    121 
    122  private:
    123    ArraySeriesMatcher::MatcherArray fMatchers;
    124    int32_t fMatchersLen;
    125    int32_t fLastTypeOrCp;
    126 
    127    const UnicodeString& fPattern;
    128    AffixTokenMatcherWarehouse& fWarehouse;
    129    IgnorablesMatcher* fIgnorables;
    130 
    131    void addMatcher(NumberParseMatcher& matcher) override;
    132 };
    133 
    134 
    135 // Exported as U_I18N_API_CLASS for tests
    136 class U_I18N_API_CLASS AffixPatternMatcher : public ArraySeriesMatcher {
    137  public:
    138    AffixPatternMatcher() = default;  // WARNING: Leaves the object in an unusable state
    139 
    140    U_I18N_API static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern,
    141                                                           AffixTokenMatcherWarehouse& warehouse,
    142                                                           parse_flags_t parseFlags, bool* success,
    143                                                           UErrorCode& status);
    144 
    145    UnicodeString getPattern() const;
    146 
    147    bool operator==(const AffixPatternMatcher& other) const;
    148 
    149  private:
    150    CompactUnicodeString<4> fPattern;
    151 
    152    AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern,
    153                        UErrorCode& status);
    154 
    155    friend class AffixPatternMatcherBuilder;
    156 };
    157 
    158 
    159 class AffixMatcher : public NumberParseMatcher, public UMemory {
    160  public:
    161    AffixMatcher() = default;  // WARNING: Leaves the object in an unusable state
    162 
    163    AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags);
    164 
    165    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
    166 
    167    void postProcess(ParsedNumber& result) const override;
    168 
    169    bool smokeTest(const StringSegment& segment) const override;
    170 
    171    int8_t compareTo(const AffixMatcher& rhs) const;
    172 
    173    UnicodeString toString() const override;
    174 
    175  private:
    176    AffixPatternMatcher* fPrefix;
    177    AffixPatternMatcher* fSuffix;
    178    result_flags_t fFlags;
    179 };
    180 
    181 
    182 /**
    183 * A C++-only class to retain ownership of the AffixMatchers needed for parsing.
    184 */
    185 class AffixMatcherWarehouse {
    186  public:
    187    AffixMatcherWarehouse() = default;  // WARNING: Leaves the object in an unusable state
    188 
    189    AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse);
    190 
    191    void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output,
    192                             const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
    193                             UErrorCode& status);
    194 
    195  private:
    196    // 18 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix,
    197    // and doubled since there may be an empty currency symbol
    198    AffixMatcher fAffixMatchers[18];
    199    // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each,
    200    // and doubled since there may be an empty currency symbol
    201    AffixPatternMatcher fAffixPatternMatchers[12];
    202    // Reference to the warehouse for tokens used by the AffixPatternMatchers
    203    AffixTokenMatcherWarehouse* fTokenWarehouse;
    204 
    205    friend class AffixMatcher;
    206 
    207    static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables,
    208                              parse_flags_t parseFlags, UErrorCode& status);
    209 };
    210 
    211 } // namespace numparse::impl
    212 
    213 U_NAMESPACE_END
    214 
    215 #endif //__NUMPARSE_AFFIXES_H__
    216 #endif /* #if !UCONFIG_NO_FORMATTING */