ulocimp.h (14359B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2004-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 */ 9 10 #ifndef ULOCIMP_H 11 #define ULOCIMP_H 12 13 #include <cstddef> 14 #include <optional> 15 #include <string_view> 16 17 #include "unicode/bytestream.h" 18 #include "unicode/uloc.h" 19 20 #include "charstr.h" 21 22 /** 23 * Create an iterator over the specified keywords list 24 * @param keywordList double-null terminated list. Will be copied. 25 * @param keywordListSize size in bytes of keywordList 26 * @param status err code 27 * @return enumeration (owned by caller) of the keyword list. 28 * @internal ICU 3.0 29 */ 30 U_CAPI UEnumeration* U_EXPORT2 31 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status); 32 33 /** 34 * Look up a resource bundle table item with fallback on the table level. 35 * This is accessible so it can be called by C++ code. 36 */ 37 U_CAPI const UChar * U_EXPORT2 38 uloc_getTableStringWithFallback( 39 const char *path, 40 const char *locale, 41 const char *tableKey, 42 const char *subTableKey, 43 const char *itemKey, 44 int32_t *pLength, 45 UErrorCode *pErrorCode); 46 47 namespace { 48 /*returns true if a is an ID separator false otherwise*/ 49 inline bool _isIDSeparator(char a) { return a == '_' || a == '-'; } 50 } // namespace 51 52 U_CFUNC const char* 53 uloc_getCurrentCountryID(const char* oldID); 54 55 U_CFUNC const char* 56 uloc_getCurrentLanguageID(const char* oldID); 57 58 U_COMMON_API std::optional<std::string_view> 59 ulocimp_toBcpKeyWithFallback(std::string_view keyword); 60 61 U_COMMON_API std::optional<std::string_view> 62 ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value); 63 64 U_COMMON_API std::optional<std::string_view> 65 ulocimp_toLegacyKeyWithFallback(std::string_view keyword); 66 67 U_COMMON_API std::optional<std::string_view> 68 ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value); 69 70 U_COMMON_API icu::CharString 71 ulocimp_getKeywords(std::string_view localeID, 72 char prev, 73 bool valuesToo, 74 UErrorCode& status); 75 76 U_COMMON_API void 77 ulocimp_getKeywords(std::string_view localeID, 78 char prev, 79 icu::ByteSink& sink, 80 bool valuesToo, 81 UErrorCode& status); 82 83 U_COMMON_API icu::CharString 84 ulocimp_getName(std::string_view localeID, 85 UErrorCode& err); 86 87 U_COMMON_API void 88 ulocimp_getName(std::string_view localeID, 89 icu::ByteSink& sink, 90 UErrorCode& err); 91 92 U_COMMON_API icu::CharString 93 ulocimp_getBaseName(std::string_view localeID, 94 UErrorCode& err); 95 96 U_COMMON_API void 97 ulocimp_getBaseName(std::string_view localeID, 98 icu::ByteSink& sink, 99 UErrorCode& err); 100 101 U_COMMON_API icu::CharString 102 ulocimp_canonicalize(std::string_view localeID, 103 UErrorCode& err); 104 105 U_COMMON_API void 106 ulocimp_canonicalize(std::string_view localeID, 107 icu::ByteSink& sink, 108 UErrorCode& err); 109 110 U_COMMON_API icu::CharString 111 ulocimp_getKeywordValue(const char* localeID, 112 std::string_view keywordName, 113 UErrorCode& status); 114 115 U_COMMON_API void 116 ulocimp_getKeywordValue(const char* localeID, 117 std::string_view keywordName, 118 icu::ByteSink& sink, 119 UErrorCode& status); 120 121 U_COMMON_API icu::CharString 122 ulocimp_getLanguage(std::string_view localeID, UErrorCode& status); 123 124 U_COMMON_API icu::CharString 125 ulocimp_getScript(std::string_view localeID, UErrorCode& status); 126 127 U_COMMON_API icu::CharString 128 ulocimp_getRegion(std::string_view localeID, UErrorCode& status); 129 130 U_COMMON_API icu::CharString 131 ulocimp_getVariant(std::string_view localeID, UErrorCode& status); 132 133 U_COMMON_API void 134 ulocimp_setKeywordValue(std::string_view keywordName, 135 std::string_view keywordValue, 136 icu::CharString& localeID, 137 UErrorCode& status); 138 139 U_COMMON_API int32_t 140 ulocimp_setKeywordValue(std::string_view keywords, 141 std::string_view keywordName, 142 std::string_view keywordValue, 143 icu::ByteSink& sink, 144 UErrorCode& status); 145 146 U_COMMON_API void 147 ulocimp_getSubtags( 148 std::string_view localeID, 149 icu::CharString* language, 150 icu::CharString* script, 151 icu::CharString* region, 152 icu::CharString* variant, 153 const char** pEnd, 154 UErrorCode& status); 155 156 U_COMMON_API void 157 ulocimp_getSubtags( 158 std::string_view localeID, 159 icu::ByteSink* language, 160 icu::ByteSink* script, 161 icu::ByteSink* region, 162 icu::ByteSink* variant, 163 const char** pEnd, 164 UErrorCode& status); 165 166 inline void 167 ulocimp_getSubtags( 168 std::string_view localeID, 169 std::nullptr_t, 170 std::nullptr_t, 171 std::nullptr_t, 172 std::nullptr_t, 173 const char** pEnd, 174 UErrorCode& status) { 175 ulocimp_getSubtags( 176 localeID, 177 static_cast<icu::ByteSink*>(nullptr), 178 static_cast<icu::ByteSink*>(nullptr), 179 static_cast<icu::ByteSink*>(nullptr), 180 static_cast<icu::ByteSink*>(nullptr), 181 pEnd, 182 status); 183 } 184 185 U_COMMON_API icu::CharString 186 ulocimp_getParent(const char* localeID, 187 UErrorCode& err); 188 189 U_COMMON_API void 190 ulocimp_getParent(const char* localeID, 191 icu::ByteSink& sink, 192 UErrorCode& err); 193 194 U_COMMON_API icu::CharString 195 ulocimp_toLanguageTag(const char* localeID, 196 bool strict, 197 UErrorCode& status); 198 199 /** 200 * Writes a well-formed language tag for this locale ID. 201 * 202 * **Note**: When `strict` is false, any locale fields which do not satisfy the 203 * BCP47 syntax requirement will be omitted from the result. When `strict` is 204 * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale 205 * fields do not satisfy the BCP47 syntax requirement. 206 * 207 * @param localeID the input locale ID 208 * @param sink the output sink receiving the BCP47 language 209 * tag for this Locale. 210 * @param strict boolean value indicating if the function returns 211 * an error for an ill-formed input locale ID. 212 * @param err error information if receiving the language 213 * tag failed. 214 * @return The length of the BCP47 language tag. 215 * 216 * @internal ICU 64 217 */ 218 U_COMMON_API void 219 ulocimp_toLanguageTag(const char* localeID, 220 icu::ByteSink& sink, 221 bool strict, 222 UErrorCode& err); 223 224 U_COMMON_API icu::CharString 225 ulocimp_forLanguageTag(const char* langtag, 226 int32_t tagLen, 227 int32_t* parsedLength, 228 UErrorCode& status); 229 230 /** 231 * Returns a locale ID for the specified BCP47 language tag string. 232 * If the specified language tag contains any ill-formed subtags, 233 * the first such subtag and all following subtags are ignored. 234 * <p> 235 * This implements the 'Language-Tag' production of BCP 47, and so 236 * supports legacy language tags (marked as “Type: grandfathered” in BCP 47) 237 * (regular and irregular) as well as private use language tags. 238 * 239 * Private use tags are represented as 'x-whatever', 240 * and legacy tags are converted to their canonical replacements where they exist. 241 * 242 * Note that a few legacy tags have no modern replacement; 243 * these will be converted using the fallback described in 244 * the first paragraph, so some information might be lost. 245 * 246 * @param langtag the input BCP47 language tag. 247 * @param tagLen the length of langtag, or -1 to call uprv_strlen(). 248 * @param sink the output sink receiving a locale ID for the 249 * specified BCP47 language tag. 250 * @param parsedLength if not NULL, successfully parsed length 251 * for the input language tag is set. 252 * @param err error information if receiving the locald ID 253 * failed. 254 * @internal ICU 63 255 */ 256 U_COMMON_API void 257 ulocimp_forLanguageTag(const char* langtag, 258 int32_t tagLen, 259 icu::ByteSink& sink, 260 int32_t* parsedLength, 261 UErrorCode& err); 262 263 /** 264 * Get the region to use for supplemental data lookup. Uses 265 * (1) any region specified by locale tag "rg"; if none then 266 * (2) any unicode_region_tag in the locale ID; if none then 267 * (3) if inferRegion is true, the region suggested by 268 * getLikelySubtags on the localeID. 269 * If no region is found, returns an empty string. 270 * 271 * @param localeID 272 * The complete locale ID (with keywords) from which 273 * to get the region to use for supplemental data. 274 * @param inferRegion 275 * If true, will try to infer region from localeID if 276 * no other region is found. 277 * @param status 278 * Pointer to in/out UErrorCode value for latest status. 279 * @return 280 * The region code found, empty if none found. 281 * @internal ICU 57 282 */ 283 U_COMMON_API icu::CharString 284 ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion, 285 UErrorCode& status); 286 287 U_COMMON_API icu::CharString 288 ulocimp_addLikelySubtags(const char* localeID, 289 UErrorCode& status); 290 291 /** 292 * Add the likely subtags for a provided locale ID, per the algorithm described 293 * in the following CLDR technical report: 294 * 295 * http://www.unicode.org/reports/tr35/#Likely_Subtags 296 * 297 * If localeID is already in the maximal form, or there is no data available 298 * for maximization, it will be copied to the output buffer. For example, 299 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. 300 * 301 * Examples: 302 * 303 * "en" maximizes to "en_Latn_US" 304 * 305 * "de" maximizes to "de_Latn_US" 306 * 307 * "sr" maximizes to "sr_Cyrl_RS" 308 * 309 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) 310 * 311 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) 312 * 313 * @param localeID The locale to maximize 314 * @param sink The output sink receiving the maximized locale 315 * @param err Error information if maximizing the locale failed. If the length 316 * of the localeID and the null-terminator is greater than the maximum allowed size, 317 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. 318 * @internal ICU 64 319 */ 320 U_COMMON_API void 321 ulocimp_addLikelySubtags(const char* localeID, 322 icu::ByteSink& sink, 323 UErrorCode& err); 324 325 U_COMMON_API icu::CharString 326 ulocimp_minimizeSubtags(const char* localeID, 327 bool favorScript, 328 UErrorCode& status); 329 330 /** 331 * Minimize the subtags for a provided locale ID, per the algorithm described 332 * in the following CLDR technical report: 333 * 334 * http://www.unicode.org/reports/tr35/#Likely_Subtags 335 * 336 * If localeID is already in the minimal form, or there is no data available 337 * for minimization, it will be copied to the output buffer. Since the 338 * minimization algorithm relies on proper maximization, see the comments 339 * for ulocimp_addLikelySubtags for reasons why there might not be any data. 340 * 341 * Examples: 342 * 343 * "en_Latn_US" minimizes to "en" 344 * 345 * "de_Latn_US" minimizes to "de" 346 * 347 * "sr_Cyrl_RS" minimizes to "sr" 348 * 349 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the 350 * script, and minimizing to "zh" would imply "zh_Hans_CN".) 351 * 352 * @param localeID The locale to minimize 353 * @param sink The output sink receiving the maximized locale 354 * @param favorScript favor to keep script if true, region if false. 355 * @param err Error information if minimizing the locale failed. If the length 356 * of the localeID and the null-terminator is greater than the maximum allowed size, 357 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. 358 * @internal ICU 64 359 */ 360 U_COMMON_API void 361 ulocimp_minimizeSubtags(const char* localeID, 362 icu::ByteSink& sink, 363 bool favorScript, 364 UErrorCode& err); 365 366 U_CAPI const char * U_EXPORT2 367 locale_getKeywordsStart(std::string_view localeID); 368 369 bool 370 ultag_isExtensionSubtags(const char* s, int32_t len); 371 372 bool 373 ultag_isLanguageSubtag(const char* s, int32_t len); 374 375 bool 376 ultag_isPrivateuseValueSubtags(const char* s, int32_t len); 377 378 bool 379 ultag_isRegionSubtag(const char* s, int32_t len); 380 381 bool 382 ultag_isScriptSubtag(const char* s, int32_t len); 383 384 bool 385 ultag_isTransformedExtensionSubtags(const char* s, int32_t len); 386 387 bool 388 ultag_isUnicodeExtensionSubtags(const char* s, int32_t len); 389 390 bool 391 ultag_isUnicodeLocaleAttribute(const char* s, int32_t len); 392 393 bool 394 ultag_isUnicodeLocaleAttributes(const char* s, int32_t len); 395 396 bool 397 ultag_isUnicodeLocaleKey(const char* s, int32_t len); 398 399 bool 400 ultag_isUnicodeLocaleType(const char* s, int32_t len); 401 402 bool 403 ultag_isVariantSubtags(const char* s, int32_t len); 404 405 const char* 406 ultag_getTKeyStart(const char* localeID); 407 408 U_COMMON_API std::optional<std::string_view> 409 ulocimp_toBcpKey(std::string_view key); 410 411 U_COMMON_API std::optional<std::string_view> 412 ulocimp_toLegacyKey(std::string_view key); 413 414 U_COMMON_API std::optional<std::string_view> 415 ulocimp_toBcpType(std::string_view key, std::string_view type); 416 417 U_COMMON_API std::optional<std::string_view> 418 ulocimp_toLegacyType(std::string_view key, std::string_view type); 419 420 /* Function for testing purpose */ 421 U_COMMON_API const char* const* 422 ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length); 423 424 // Return true if the value is already canonicalized. 425 U_COMMON_API bool 426 ulocimp_isCanonicalizedLocaleForTest(const char* localeName); 427 428 #ifdef __cplusplus 429 U_NAMESPACE_BEGIN 430 class U_COMMON_API RegionValidateMap : public UObject { 431 public: 432 RegionValidateMap(); 433 virtual ~RegionValidateMap(); 434 bool isSet(const char* region) const; 435 bool equals(const RegionValidateMap& that) const; 436 protected: 437 int32_t value(const char* region) const; 438 uint32_t map[22]; // 26x26/32 = 22; 439 }; 440 U_NAMESPACE_END 441 #endif /* __cplusplus */ 442 443 #endif