tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ulocimp.h (14359B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2004-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 */
      9 
     10 #ifndef ULOCIMP_H
     11 #define ULOCIMP_H
     12 
     13 #include <cstddef>
     14 #include <optional>
     15 #include <string_view>
     16 
     17 #include "unicode/bytestream.h"
     18 #include "unicode/uloc.h"
     19 
     20 #include "charstr.h"
     21 
     22 /**
     23 * Create an iterator over the specified keywords list
     24 * @param keywordList double-null terminated list. Will be copied.
     25 * @param keywordListSize size in bytes of keywordList
     26 * @param status err code
     27 * @return enumeration (owned by caller) of the keyword list.
     28 * @internal ICU 3.0
     29 */
     30 U_CAPI UEnumeration* U_EXPORT2
     31 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
     32 
     33 /**
     34 * Look up a resource bundle table item with fallback on the table level.
     35 * This is accessible so it can be called by C++ code.
     36 */
     37 U_CAPI const UChar * U_EXPORT2
     38 uloc_getTableStringWithFallback(
     39    const char *path,
     40    const char *locale,
     41    const char *tableKey,
     42    const char *subTableKey,
     43    const char *itemKey,
     44    int32_t *pLength,
     45    UErrorCode *pErrorCode);
     46 
     47 namespace {
     48 /*returns true if a is an ID separator false otherwise*/
     49 inline bool _isIDSeparator(char a) { return a == '_' || a == '-'; }
     50 }  // namespace
     51 
     52 U_CFUNC const char* 
     53 uloc_getCurrentCountryID(const char* oldID);
     54 
     55 U_CFUNC const char* 
     56 uloc_getCurrentLanguageID(const char* oldID);
     57 
     58 U_COMMON_API std::optional<std::string_view>
     59 ulocimp_toBcpKeyWithFallback(std::string_view keyword);
     60 
     61 U_COMMON_API std::optional<std::string_view>
     62 ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value);
     63 
     64 U_COMMON_API std::optional<std::string_view>
     65 ulocimp_toLegacyKeyWithFallback(std::string_view keyword);
     66 
     67 U_COMMON_API std::optional<std::string_view>
     68 ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value);
     69 
     70 U_COMMON_API icu::CharString
     71 ulocimp_getKeywords(std::string_view localeID,
     72                    char prev,
     73                    bool valuesToo,
     74                    UErrorCode& status);
     75 
     76 U_COMMON_API void
     77 ulocimp_getKeywords(std::string_view localeID,
     78                    char prev,
     79                    icu::ByteSink& sink,
     80                    bool valuesToo,
     81                    UErrorCode& status);
     82 
     83 U_COMMON_API icu::CharString
     84 ulocimp_getName(std::string_view localeID,
     85                UErrorCode& err);
     86 
     87 U_COMMON_API void
     88 ulocimp_getName(std::string_view localeID,
     89                icu::ByteSink& sink,
     90                UErrorCode& err);
     91 
     92 U_COMMON_API icu::CharString
     93 ulocimp_getBaseName(std::string_view localeID,
     94                    UErrorCode& err);
     95 
     96 U_COMMON_API void
     97 ulocimp_getBaseName(std::string_view localeID,
     98                    icu::ByteSink& sink,
     99                    UErrorCode& err);
    100 
    101 U_COMMON_API icu::CharString
    102 ulocimp_canonicalize(std::string_view localeID,
    103                     UErrorCode& err);
    104 
    105 U_COMMON_API void
    106 ulocimp_canonicalize(std::string_view localeID,
    107                     icu::ByteSink& sink,
    108                     UErrorCode& err);
    109 
    110 U_COMMON_API icu::CharString
    111 ulocimp_getKeywordValue(const char* localeID,
    112                        std::string_view keywordName,
    113                        UErrorCode& status);
    114 
    115 U_COMMON_API void
    116 ulocimp_getKeywordValue(const char* localeID,
    117                        std::string_view keywordName,
    118                        icu::ByteSink& sink,
    119                        UErrorCode& status);
    120 
    121 U_COMMON_API icu::CharString
    122 ulocimp_getLanguage(std::string_view localeID, UErrorCode& status);
    123 
    124 U_COMMON_API icu::CharString
    125 ulocimp_getScript(std::string_view localeID, UErrorCode& status);
    126 
    127 U_COMMON_API icu::CharString
    128 ulocimp_getRegion(std::string_view localeID, UErrorCode& status);
    129 
    130 U_COMMON_API icu::CharString
    131 ulocimp_getVariant(std::string_view localeID, UErrorCode& status);
    132 
    133 U_COMMON_API void
    134 ulocimp_setKeywordValue(std::string_view keywordName,
    135                        std::string_view keywordValue,
    136                        icu::CharString& localeID,
    137                        UErrorCode& status);
    138 
    139 U_COMMON_API int32_t
    140 ulocimp_setKeywordValue(std::string_view keywords,
    141                        std::string_view keywordName,
    142                        std::string_view keywordValue,
    143                        icu::ByteSink& sink,
    144                        UErrorCode& status);
    145 
    146 U_COMMON_API void
    147 ulocimp_getSubtags(
    148        std::string_view localeID,
    149        icu::CharString* language,
    150        icu::CharString* script,
    151        icu::CharString* region,
    152        icu::CharString* variant,
    153        const char** pEnd,
    154        UErrorCode& status);
    155 
    156 U_COMMON_API void
    157 ulocimp_getSubtags(
    158        std::string_view localeID,
    159        icu::ByteSink* language,
    160        icu::ByteSink* script,
    161        icu::ByteSink* region,
    162        icu::ByteSink* variant,
    163        const char** pEnd,
    164        UErrorCode& status);
    165 
    166 inline void
    167 ulocimp_getSubtags(
    168        std::string_view localeID,
    169        std::nullptr_t,
    170        std::nullptr_t,
    171        std::nullptr_t,
    172        std::nullptr_t,
    173        const char** pEnd,
    174        UErrorCode& status) {
    175    ulocimp_getSubtags(
    176            localeID,
    177            static_cast<icu::ByteSink*>(nullptr),
    178            static_cast<icu::ByteSink*>(nullptr),
    179            static_cast<icu::ByteSink*>(nullptr),
    180            static_cast<icu::ByteSink*>(nullptr),
    181            pEnd,
    182            status);
    183 }
    184 
    185 U_COMMON_API icu::CharString
    186 ulocimp_getParent(const char* localeID,
    187                  UErrorCode& err);
    188 
    189 U_COMMON_API void
    190 ulocimp_getParent(const char* localeID,
    191                  icu::ByteSink& sink,
    192                  UErrorCode& err);
    193 
    194 U_COMMON_API icu::CharString
    195 ulocimp_toLanguageTag(const char* localeID,
    196                      bool strict,
    197                      UErrorCode& status);
    198 
    199 /**
    200 * Writes a well-formed language tag for this locale ID.
    201 *
    202 * **Note**: When `strict` is false, any locale fields which do not satisfy the
    203 * BCP47 syntax requirement will be omitted from the result.  When `strict` is
    204 * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
    205 * fields do not satisfy the BCP47 syntax requirement.
    206 *
    207 * @param localeID  the input locale ID
    208 * @param sink      the output sink receiving the BCP47 language
    209 *                  tag for this Locale.
    210 * @param strict    boolean value indicating if the function returns
    211 *                  an error for an ill-formed input locale ID.
    212 * @param err       error information if receiving the language
    213 *                  tag failed.
    214 * @return          The length of the BCP47 language tag.
    215 *
    216 * @internal ICU 64
    217 */
    218 U_COMMON_API void
    219 ulocimp_toLanguageTag(const char* localeID,
    220                      icu::ByteSink& sink,
    221                      bool strict,
    222                      UErrorCode& err);
    223 
    224 U_COMMON_API icu::CharString
    225 ulocimp_forLanguageTag(const char* langtag,
    226                       int32_t tagLen,
    227                       int32_t* parsedLength,
    228                       UErrorCode& status);
    229 
    230 /**
    231 * Returns a locale ID for the specified BCP47 language tag string.
    232 * If the specified language tag contains any ill-formed subtags,
    233 * the first such subtag and all following subtags are ignored.
    234 * <p>
    235 * This implements the 'Language-Tag' production of BCP 47, and so
    236 * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
    237 * (regular and irregular) as well as private use language tags.
    238 *
    239 * Private use tags are represented as 'x-whatever',
    240 * and legacy tags are converted to their canonical replacements where they exist.
    241 *
    242 * Note that a few legacy tags have no modern replacement;
    243 * these will be converted using the fallback described in
    244 * the first paragraph, so some information might be lost.
    245 *
    246 * @param langtag   the input BCP47 language tag.
    247 * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
    248 * @param sink      the output sink receiving a locale ID for the
    249 *                  specified BCP47 language tag.
    250 * @param parsedLength  if not NULL, successfully parsed length
    251 *                      for the input language tag is set.
    252 * @param err       error information if receiving the locald ID
    253 *                  failed.
    254 * @internal ICU 63
    255 */
    256 U_COMMON_API void
    257 ulocimp_forLanguageTag(const char* langtag,
    258                       int32_t tagLen,
    259                       icu::ByteSink& sink,
    260                       int32_t* parsedLength,
    261                       UErrorCode& err);
    262 
    263 /**
    264 * Get the region to use for supplemental data lookup. Uses
    265 * (1) any region specified by locale tag "rg"; if none then
    266 * (2) any unicode_region_tag in the locale ID; if none then
    267 * (3) if inferRegion is true, the region suggested by
    268 * getLikelySubtags on the localeID.
    269 * If no region is found, returns an empty string.
    270 *
    271 * @param localeID
    272 *     The complete locale ID (with keywords) from which
    273 *     to get the region to use for supplemental data.
    274 * @param inferRegion
    275 *     If true, will try to infer region from localeID if
    276 *     no other region is found.
    277 * @param status
    278 *     Pointer to in/out UErrorCode value for latest status.
    279 * @return
    280 *     The region code found, empty if none found.
    281 * @internal ICU 57
    282 */
    283 U_COMMON_API icu::CharString
    284 ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
    285                                     UErrorCode& status);
    286 
    287 U_COMMON_API icu::CharString
    288 ulocimp_addLikelySubtags(const char* localeID,
    289                         UErrorCode& status);
    290 
    291 /**
    292 * Add the likely subtags for a provided locale ID, per the algorithm described
    293 * in the following CLDR technical report:
    294 *
    295 *   http://www.unicode.org/reports/tr35/#Likely_Subtags
    296 *
    297 * If localeID is already in the maximal form, or there is no data available
    298 * for maximization, it will be copied to the output buffer.  For example,
    299 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
    300 *
    301 * Examples:
    302 *
    303 * "en" maximizes to "en_Latn_US"
    304 *
    305 * "de" maximizes to "de_Latn_US"
    306 *
    307 * "sr" maximizes to "sr_Cyrl_RS"
    308 *
    309 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
    310 *
    311 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
    312 *
    313 * @param localeID The locale to maximize
    314 * @param sink The output sink receiving the maximized locale
    315 * @param err Error information if maximizing the locale failed.  If the length
    316 * of the localeID and the null-terminator is greater than the maximum allowed size,
    317 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
    318 * @internal ICU 64
    319 */
    320 U_COMMON_API void
    321 ulocimp_addLikelySubtags(const char* localeID,
    322                         icu::ByteSink& sink,
    323                         UErrorCode& err);
    324 
    325 U_COMMON_API icu::CharString
    326 ulocimp_minimizeSubtags(const char* localeID,
    327                        bool favorScript,
    328                        UErrorCode& status);
    329 
    330 /**
    331 * Minimize the subtags for a provided locale ID, per the algorithm described
    332 * in the following CLDR technical report:
    333 *
    334 *   http://www.unicode.org/reports/tr35/#Likely_Subtags
    335 *
    336 * If localeID is already in the minimal form, or there is no data available
    337 * for minimization, it will be copied to the output buffer.  Since the
    338 * minimization algorithm relies on proper maximization, see the comments
    339 * for ulocimp_addLikelySubtags for reasons why there might not be any data.
    340 *
    341 * Examples:
    342 *
    343 * "en_Latn_US" minimizes to "en"
    344 *
    345 * "de_Latn_US" minimizes to "de"
    346 *
    347 * "sr_Cyrl_RS" minimizes to "sr"
    348 *
    349 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
    350 * script, and minimizing to "zh" would imply "zh_Hans_CN".)
    351 *
    352 * @param localeID The locale to minimize
    353 * @param sink The output sink receiving the maximized locale
    354 * @param favorScript favor to keep script if true, region if false.
    355 * @param err Error information if minimizing the locale failed.  If the length
    356 * of the localeID and the null-terminator is greater than the maximum allowed size,
    357 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
    358 * @internal ICU 64
    359 */
    360 U_COMMON_API void
    361 ulocimp_minimizeSubtags(const char* localeID,
    362                        icu::ByteSink& sink,
    363                        bool favorScript,
    364                        UErrorCode& err);
    365 
    366 U_CAPI const char * U_EXPORT2
    367 locale_getKeywordsStart(std::string_view localeID);
    368 
    369 bool
    370 ultag_isExtensionSubtags(const char* s, int32_t len);
    371 
    372 bool
    373 ultag_isLanguageSubtag(const char* s, int32_t len);
    374 
    375 bool
    376 ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
    377 
    378 bool
    379 ultag_isRegionSubtag(const char* s, int32_t len);
    380 
    381 bool
    382 ultag_isScriptSubtag(const char* s, int32_t len);
    383 
    384 bool
    385 ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
    386 
    387 bool
    388 ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
    389 
    390 bool
    391 ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
    392 
    393 bool
    394 ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
    395 
    396 bool
    397 ultag_isUnicodeLocaleKey(const char* s, int32_t len);
    398 
    399 bool
    400 ultag_isUnicodeLocaleType(const char* s, int32_t len);
    401 
    402 bool
    403 ultag_isVariantSubtags(const char* s, int32_t len);
    404 
    405 const char*
    406 ultag_getTKeyStart(const char* localeID);
    407 
    408 U_COMMON_API std::optional<std::string_view>
    409 ulocimp_toBcpKey(std::string_view key);
    410 
    411 U_COMMON_API std::optional<std::string_view>
    412 ulocimp_toLegacyKey(std::string_view key);
    413 
    414 U_COMMON_API std::optional<std::string_view>
    415 ulocimp_toBcpType(std::string_view key, std::string_view type);
    416 
    417 U_COMMON_API std::optional<std::string_view>
    418 ulocimp_toLegacyType(std::string_view key, std::string_view type);
    419 
    420 /* Function for testing purpose */
    421 U_COMMON_API const char* const*
    422 ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length);
    423 
    424 // Return true if the value is already canonicalized.
    425 U_COMMON_API bool
    426 ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
    427 
    428 #ifdef __cplusplus
    429 U_NAMESPACE_BEGIN
    430 class U_COMMON_API RegionValidateMap : public UObject {
    431 public:
    432  RegionValidateMap();
    433  virtual ~RegionValidateMap();
    434  bool isSet(const char* region) const;
    435  bool equals(const RegionValidateMap& that) const;
    436 protected:
    437  int32_t value(const char* region) const;
    438  uint32_t map[22]; // 26x26/32 = 22;
    439 };
    440 U_NAMESPACE_END
    441 #endif /* __cplusplus */
    442 
    443 #endif