static_unicode_sets.h (3856B)
1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 // This file contains utilities to deal with static-allocated UnicodeSets. 5 // 6 // Common use case: you write a "private static final" UnicodeSet in Java, and 7 // want something similarly easy in C++. Originally written for number 8 // parsing, but this header can be used for other applications. 9 // 10 // Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)` 11 // 12 // This file is in common instead of i18n because it is needed by ucurr.cpp. 13 // 14 // Author: sffc 15 16 #include "unicode/utypes.h" 17 18 #if !UCONFIG_NO_FORMATTING 19 #ifndef __STATIC_UNICODE_SETS_H__ 20 #define __STATIC_UNICODE_SETS_H__ 21 22 #include "unicode/uniset.h" 23 #include "unicode/unistr.h" 24 25 U_NAMESPACE_BEGIN 26 namespace unisets { 27 28 enum Key { 29 // NONE is used to indicate null in chooseFrom(). 30 // EMPTY is used to get an empty UnicodeSet. 31 NONE = -1, 32 EMPTY = 0, 33 34 // Ignorables 35 DEFAULT_IGNORABLES, 36 STRICT_IGNORABLES, 37 38 // Separators 39 // Notes: 40 // - COMMA is a superset of STRICT_COMMA 41 // - PERIOD is a superset of SCRICT_PERIOD 42 // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS 43 // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS 44 COMMA, 45 PERIOD, 46 STRICT_COMMA, 47 STRICT_PERIOD, 48 APOSTROPHE_SIGN, 49 OTHER_GROUPING_SEPARATORS, 50 ALL_SEPARATORS, 51 STRICT_ALL_SEPARATORS, 52 53 // Symbols 54 MINUS_SIGN, 55 PLUS_SIGN, 56 PERCENT_SIGN, 57 PERMILLE_SIGN, 58 INFINITY_SIGN, 59 APPROXIMATELY_SIGN, 60 61 // Currency Symbols 62 DOLLAR_SIGN, 63 POUND_SIGN, 64 RUPEE_SIGN, 65 YEN_SIGN, 66 WON_SIGN, 67 68 // Other 69 DIGITS, 70 71 // Combined Separators with Digits (for lead code points) 72 DIGITS_OR_ALL_SEPARATORS, 73 DIGITS_OR_STRICT_ALL_SEPARATORS, 74 75 // The number of elements in the enum. 76 UNISETS_KEY_COUNT 77 }; 78 79 /** 80 * Gets the static-allocated UnicodeSet according to the provided key. The 81 * pointer will be deleted during u_cleanup(); the caller should NOT delete it. 82 * 83 * Exported as U_COMMON_API for ucurr.cpp 84 * 85 * This method is always safe and OK to chain: in the case of a memory or other 86 * error, it returns an empty set from static memory. 87 * 88 * Example: 89 * 90 * UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...); 91 * 92 * @param key The desired UnicodeSet according to the enum in this file. 93 * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but 94 * may be empty if an error occurred during data loading. 95 */ 96 U_COMMON_API const UnicodeSet* get(Key key); 97 98 /** 99 * Checks if the UnicodeSet given by key1 contains the given string. 100 * 101 * Exported as U_COMMON_API for numparse_decimal.cpp 102 * 103 * @param str The string to check. 104 * @param key1 The set to check. 105 * @return key1 if the set contains str, or NONE if not. 106 */ 107 U_COMMON_API Key chooseFrom(UnicodeString str, Key key1); 108 109 /** 110 * Checks if the UnicodeSet given by either key1 or key2 contains the string. 111 * 112 * Exported as U_COMMON_API for numparse_decimal.cpp 113 * 114 * @param str The string to check. 115 * @param key1 The first set to check. 116 * @param key2 The second set to check. 117 * @return key1 if that set contains str; key2 if that set contains str; or 118 * NONE if neither set contains str. 119 */ 120 U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2); 121 122 // TODO: Load these from data: ICU-20108 123 // Unused in C++: 124 // Key chooseCurrency(UnicodeString str); 125 // Used instead: 126 static const struct { 127 Key key; 128 UChar32 exemplar; 129 } kCurrencyEntries[] = { 130 {DOLLAR_SIGN, u'$'}, 131 {POUND_SIGN, u'£'}, 132 {RUPEE_SIGN, u'₹'}, 133 {YEN_SIGN, u'¥'}, 134 {WON_SIGN, u'₩'}, 135 }; 136 137 } // namespace unisets 138 U_NAMESPACE_END 139 140 #endif //__STATIC_UNICODE_SETS_H__ 141 #endif /* #if !UCONFIG_NO_FORMATTING */