tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

static_unicode_sets.h (3856B)


      1 // © 2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 // This file contains utilities to deal with static-allocated UnicodeSets.
      5 //
      6 // Common use case: you write a "private static final" UnicodeSet in Java, and
      7 // want something similarly easy in C++.  Originally written for number
      8 // parsing, but this header can be used for other applications.
      9 //
     10 // Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)`
     11 //
     12 // This file is in common instead of i18n because it is needed by ucurr.cpp.
     13 //
     14 // Author: sffc
     15 
     16 #include "unicode/utypes.h"
     17 
     18 #if !UCONFIG_NO_FORMATTING
     19 #ifndef __STATIC_UNICODE_SETS_H__
     20 #define __STATIC_UNICODE_SETS_H__
     21 
     22 #include "unicode/uniset.h"
     23 #include "unicode/unistr.h"
     24 
     25 U_NAMESPACE_BEGIN
     26 namespace unisets {
     27 
     28 enum Key {
     29    // NONE is used to indicate null in chooseFrom().
     30    // EMPTY is used to get an empty UnicodeSet.
     31    NONE = -1,
     32    EMPTY = 0,
     33 
     34    // Ignorables
     35    DEFAULT_IGNORABLES,
     36    STRICT_IGNORABLES,
     37 
     38    // Separators
     39    // Notes:
     40    // - COMMA is a superset of STRICT_COMMA
     41    // - PERIOD is a superset of SCRICT_PERIOD
     42    // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
     43    // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
     44    COMMA,
     45    PERIOD,
     46    STRICT_COMMA,
     47    STRICT_PERIOD,
     48    APOSTROPHE_SIGN,
     49    OTHER_GROUPING_SEPARATORS,
     50    ALL_SEPARATORS,
     51    STRICT_ALL_SEPARATORS,
     52 
     53    // Symbols
     54    MINUS_SIGN,
     55    PLUS_SIGN,
     56    PERCENT_SIGN,
     57    PERMILLE_SIGN,
     58    INFINITY_SIGN,
     59    APPROXIMATELY_SIGN,
     60 
     61    // Currency Symbols
     62    DOLLAR_SIGN,
     63    POUND_SIGN,
     64    RUPEE_SIGN,
     65    YEN_SIGN,
     66    WON_SIGN,
     67 
     68    // Other
     69    DIGITS,
     70 
     71    // Combined Separators with Digits (for lead code points)
     72    DIGITS_OR_ALL_SEPARATORS,
     73    DIGITS_OR_STRICT_ALL_SEPARATORS,
     74 
     75    // The number of elements in the enum.
     76    UNISETS_KEY_COUNT
     77 };
     78 
     79 /**
     80 * Gets the static-allocated UnicodeSet according to the provided key. The
     81 * pointer will be deleted during u_cleanup(); the caller should NOT delete it.
     82 *
     83 * Exported as U_COMMON_API for ucurr.cpp
     84 *
     85 * This method is always safe and OK to chain: in the case of a memory or other
     86 * error, it returns an empty set from static memory.
     87 * 
     88 * Example:
     89 * 
     90 *     UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...);
     91 *
     92 * @param key The desired UnicodeSet according to the enum in this file.
     93 * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but
     94 *         may be empty if an error occurred during data loading.
     95 */
     96 U_COMMON_API const UnicodeSet* get(Key key);
     97 
     98 /**
     99 * Checks if the UnicodeSet given by key1 contains the given string.
    100 *
    101 * Exported as U_COMMON_API for numparse_decimal.cpp
    102 *
    103 * @param str The string to check.
    104 * @param key1 The set to check.
    105 * @return key1 if the set contains str, or NONE if not.
    106 */
    107 U_COMMON_API Key chooseFrom(UnicodeString str, Key key1);
    108 
    109 /**
    110 * Checks if the UnicodeSet given by either key1 or key2 contains the string.
    111 *
    112 * Exported as U_COMMON_API for numparse_decimal.cpp
    113 *
    114 * @param str The string to check.
    115 * @param key1 The first set to check.
    116 * @param key2 The second set to check.
    117 * @return key1 if that set contains str; key2 if that set contains str; or
    118 *         NONE if neither set contains str.
    119 */
    120 U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2);
    121 
    122 // TODO: Load these from data: ICU-20108
    123 // Unused in C++:
    124 // Key chooseCurrency(UnicodeString str);
    125 // Used instead:
    126 static const struct {
    127    Key key;
    128    UChar32 exemplar;
    129 } kCurrencyEntries[] = {
    130    {DOLLAR_SIGN, u'$'},
    131    {POUND_SIGN, u'£'},
    132    {RUPEE_SIGN, u'₹'},
    133    {YEN_SIGN, u'¥'},
    134    {WON_SIGN, u'₩'},
    135 };
    136 
    137 } // namespace unisets
    138 U_NAMESPACE_END
    139 
    140 #endif //__STATIC_UNICODE_SETS_H__
    141 #endif /* #if !UCONFIG_NO_FORMATTING */