localebuilder.cpp (15252B)
1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include <optional> 5 #include <string_view> 6 #include <utility> 7 8 #include "bytesinkutil.h" // StringByteSink<CharString> 9 #include "charstr.h" 10 #include "cstring.h" 11 #include "fixedstring.h" 12 #include "ulocimp.h" 13 #include "unicode/localebuilder.h" 14 #include "unicode/locid.h" 15 16 namespace { 17 18 inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; } 19 inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); } 20 21 constexpr const char* kAttributeKey = "attribute"; 22 23 bool _isExtensionSubtags(char key, const char* s, int32_t len) { 24 switch (uprv_tolower(key)) { 25 case 'u': 26 return ultag_isUnicodeExtensionSubtags(s, len); 27 case 't': 28 return ultag_isTransformedExtensionSubtags(s, len); 29 case 'x': 30 return ultag_isPrivateuseValueSubtags(s, len); 31 default: 32 return ultag_isExtensionSubtags(s, len); 33 } 34 } 35 36 } // namespace 37 38 U_NAMESPACE_BEGIN 39 40 LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(), 41 script_(), region_(), variant_(nullptr), extensions_(nullptr) 42 { 43 language_[0] = 0; 44 script_[0] = 0; 45 region_[0] = 0; 46 } 47 48 LocaleBuilder::~LocaleBuilder() 49 { 50 delete variant_; 51 delete extensions_; 52 } 53 54 LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale) 55 { 56 clear(); 57 setLanguage(locale.getLanguage()); 58 setScript(locale.getScript()); 59 setRegion(locale.getCountry()); 60 setVariant(locale.getVariant()); 61 extensions_ = locale.clone(); 62 if (extensions_ == nullptr) { 63 status_ = U_MEMORY_ALLOCATION_ERROR; 64 } 65 return *this; 66 } 67 68 LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag) 69 { 70 Locale l = Locale::forLanguageTag(tag, status_); 71 if (U_FAILURE(status_)) { return *this; } 72 // Because setLocale will reset status_ we need to return 73 // first if we have error in forLanguageTag. 74 setLocale(l); 75 return *this; 76 } 77 78 namespace { 79 80 void setField(StringPiece input, char* dest, UErrorCode& errorCode, 81 bool (*test)(const char*, int32_t)) { 82 if (U_FAILURE(errorCode)) { return; } 83 if (input.empty()) { 84 dest[0] = '\0'; 85 } else if (test(input.data(), input.length())) { 86 uprv_memcpy(dest, input.data(), input.length()); 87 dest[input.length()] = '\0'; 88 } else { 89 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 90 } 91 } 92 93 } // namespace 94 95 LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language) 96 { 97 setField(language, language_, status_, &ultag_isLanguageSubtag); 98 return *this; 99 } 100 101 LocaleBuilder& LocaleBuilder::setScript(StringPiece script) 102 { 103 setField(script, script_, status_, &ultag_isScriptSubtag); 104 return *this; 105 } 106 107 LocaleBuilder& LocaleBuilder::setRegion(StringPiece region) 108 { 109 setField(region, region_, status_, &ultag_isRegionSubtag); 110 return *this; 111 } 112 113 namespace { 114 115 void transform(char* data, int32_t len) { 116 for (int32_t i = 0; i < len; i++, data++) { 117 if (*data == '_') { 118 *data = '-'; 119 } else { 120 *data = uprv_tolower(*data); 121 } 122 } 123 } 124 125 } // namespace 126 127 LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant) 128 { 129 if (U_FAILURE(status_)) { return *this; } 130 if (variant.empty()) { 131 delete variant_; 132 variant_ = nullptr; 133 return *this; 134 } 135 FixedString* new_variant = new FixedString(variant); 136 if (new_variant == nullptr || new_variant->isEmpty()) { 137 status_ = U_MEMORY_ALLOCATION_ERROR; 138 return *this; 139 } 140 transform(new_variant->getAlias(), variant.length()); 141 if (!ultag_isVariantSubtags(new_variant->data(), variant.length())) { 142 delete new_variant; 143 status_ = U_ILLEGAL_ARGUMENT_ERROR; 144 return *this; 145 } 146 delete variant_; 147 variant_ = new_variant; 148 return *this; 149 } 150 151 namespace { 152 153 bool 154 _isKeywordValue(const char* key, const char* value, int32_t value_len) 155 { 156 if (key[1] == '\0') { 157 // one char key 158 return (UPRV_ISALPHANUM(uprv_tolower(key[0])) && 159 _isExtensionSubtags(key[0], value, value_len)); 160 } else if (uprv_strcmp(key, kAttributeKey) == 0) { 161 // unicode attributes 162 return ultag_isUnicodeLocaleAttributes(value, value_len); 163 } 164 // otherwise: unicode extension value 165 // We need to convert from legacy key/value to unicode 166 // key/value 167 std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key); 168 std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value); 169 170 return unicode_locale_key.has_value() && 171 unicode_locale_type.has_value() && 172 ultag_isUnicodeLocaleKey(unicode_locale_key->data(), 173 static_cast<int32_t>(unicode_locale_key->size())) && 174 ultag_isUnicodeLocaleType(unicode_locale_type->data(), 175 static_cast<int32_t>(unicode_locale_type->size())); 176 } 177 178 void 179 _copyExtensions(const Locale& from, icu::StringEnumeration *keywords, 180 Locale& to, bool validate, UErrorCode& errorCode) 181 { 182 if (U_FAILURE(errorCode)) { return; } 183 LocalPointer<icu::StringEnumeration> ownedKeywords; 184 if (keywords == nullptr) { 185 ownedKeywords.adoptInstead(from.createKeywords(errorCode)); 186 if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; } 187 keywords = ownedKeywords.getAlias(); 188 } 189 const char* key; 190 while ((key = keywords->next(nullptr, errorCode)) != nullptr) { 191 auto value = from.getKeywordValue<CharString>(key, errorCode); 192 if (U_FAILURE(errorCode)) { return; } 193 if (uprv_strcmp(key, kAttributeKey) == 0) { 194 transform(value.data(), value.length()); 195 } 196 if (validate && 197 !_isKeywordValue(key, value.data(), value.length())) { 198 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 199 return; 200 } 201 to.setKeywordValue(key, value.data(), errorCode); 202 if (U_FAILURE(errorCode)) { return; } 203 } 204 } 205 206 void 207 _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode) 208 { 209 if (U_FAILURE(errorCode)) { return; } 210 // Clear Unicode attributes 211 locale.setKeywordValue(kAttributeKey, "", errorCode); 212 213 // Clear all Unicode keyword values 214 LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode)); 215 if (U_FAILURE(errorCode) || iter.isNull()) { return; } 216 const char* key; 217 while ((key = iter->next(nullptr, errorCode)) != nullptr) { 218 locale.setUnicodeKeywordValue(key, nullptr, errorCode); 219 } 220 } 221 222 void 223 _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode) 224 { 225 if (U_FAILURE(errorCode)) { return; } 226 // Add the unicode extensions to extensions_ 227 CharString locale_str("und-u-", errorCode); 228 locale_str.append(value, errorCode); 229 _copyExtensions( 230 Locale::forLanguageTag(locale_str.data(), errorCode), nullptr, 231 locale, false, errorCode); 232 } 233 234 } // namespace 235 236 LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value) 237 { 238 if (U_FAILURE(status_)) { return *this; } 239 if (!UPRV_ISALPHANUM(key)) { 240 status_ = U_ILLEGAL_ARGUMENT_ERROR; 241 return *this; 242 } 243 CharString value_str(value, status_); 244 if (U_FAILURE(status_)) { return *this; } 245 transform(value_str.data(), value_str.length()); 246 if (!value_str.isEmpty() && 247 !_isExtensionSubtags(key, value_str.data(), value_str.length())) { 248 status_ = U_ILLEGAL_ARGUMENT_ERROR; 249 return *this; 250 } 251 if (extensions_ == nullptr) { 252 extensions_ = Locale::getRoot().clone(); 253 if (extensions_ == nullptr) { 254 status_ = U_MEMORY_ALLOCATION_ERROR; 255 return *this; 256 } 257 } 258 if (uprv_tolower(key) != 'u') { 259 // for t, x and others extension. 260 extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(), 261 status_); 262 return *this; 263 } 264 _clearUAttributesAndKeyType(*extensions_, status_); 265 if (U_FAILURE(status_)) { return *this; } 266 if (!value.empty()) { 267 _setUnicodeExtensions(*extensions_, value_str, status_); 268 } 269 return *this; 270 } 271 272 LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword( 273 StringPiece key, StringPiece type) 274 { 275 if (U_FAILURE(status_)) { return *this; } 276 if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) || 277 (!type.empty() && 278 !ultag_isUnicodeLocaleType(type.data(), type.length()))) { 279 status_ = U_ILLEGAL_ARGUMENT_ERROR; 280 return *this; 281 } 282 if (extensions_ == nullptr) { 283 extensions_ = Locale::getRoot().clone(); 284 if (extensions_ == nullptr) { 285 status_ = U_MEMORY_ALLOCATION_ERROR; 286 return *this; 287 } 288 } 289 extensions_->setUnicodeKeywordValue(key, type, status_); 290 return *this; 291 } 292 293 LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute( 294 StringPiece value) 295 { 296 CharString value_str(value, status_); 297 if (U_FAILURE(status_)) { return *this; } 298 transform(value_str.data(), value_str.length()); 299 if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { 300 status_ = U_ILLEGAL_ARGUMENT_ERROR; 301 return *this; 302 } 303 if (extensions_ == nullptr) { 304 extensions_ = Locale::getRoot().clone(); 305 if (extensions_ == nullptr) { 306 status_ = U_MEMORY_ALLOCATION_ERROR; 307 return *this; 308 } 309 extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_); 310 return *this; 311 } 312 313 UErrorCode localErrorCode = U_ZERO_ERROR; 314 auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode); 315 if (U_FAILURE(localErrorCode)) { 316 CharString new_attributes(value_str.data(), status_); 317 // No attributes, set the attribute. 318 extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); 319 return *this; 320 } 321 322 transform(attributes.data(),attributes.length()); 323 const char* start = attributes.data(); 324 const char* limit = attributes.data() + attributes.length(); 325 CharString new_attributes; 326 bool inserted = false; 327 while (start < limit) { 328 if (!inserted) { 329 int cmp = uprv_strcmp(start, value_str.data()); 330 if (cmp == 0) { return *this; } // Found it in attributes: Just return 331 if (cmp > 0) { 332 if (!new_attributes.isEmpty()) new_attributes.append('_', status_); 333 new_attributes.append(value_str.data(), status_); 334 inserted = true; 335 } 336 } 337 if (!new_attributes.isEmpty()) { 338 new_attributes.append('_', status_); 339 } 340 new_attributes.append(start, status_); 341 start += uprv_strlen(start) + 1; 342 } 343 if (!inserted) { 344 if (!new_attributes.isEmpty()) { 345 new_attributes.append('_', status_); 346 } 347 new_attributes.append(value_str.data(), status_); 348 } 349 // Not yet in the attributes, set the attribute. 350 extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); 351 return *this; 352 } 353 354 LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute( 355 StringPiece value) 356 { 357 CharString value_str(value, status_); 358 if (U_FAILURE(status_)) { return *this; } 359 transform(value_str.data(), value_str.length()); 360 if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { 361 status_ = U_ILLEGAL_ARGUMENT_ERROR; 362 return *this; 363 } 364 if (extensions_ == nullptr) { return *this; } 365 UErrorCode localErrorCode = U_ZERO_ERROR; 366 auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode); 367 // get failure, just return 368 if (U_FAILURE(localErrorCode)) { return *this; } 369 // Do not have any attributes, just return. 370 if (attributes.isEmpty()) { return *this; } 371 372 char* p = attributes.data(); 373 // Replace null terminiator in place for _ and - so later 374 // we can use uprv_strcmp to compare. 375 for (int32_t i = 0; i < attributes.length(); i++, p++) { 376 *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p); 377 } 378 379 const char* start = attributes.data(); 380 const char* limit = attributes.data() + attributes.length(); 381 CharString new_attributes; 382 bool found = false; 383 while (start < limit) { 384 if (uprv_strcmp(start, value_str.data()) == 0) { 385 found = true; 386 } else { 387 if (!new_attributes.isEmpty()) { 388 new_attributes.append('_', status_); 389 } 390 new_attributes.append(start, status_); 391 } 392 start += uprv_strlen(start) + 1; 393 } 394 // Found the value in attributes, set the attribute. 395 if (found) { 396 extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); 397 } 398 return *this; 399 } 400 401 LocaleBuilder& LocaleBuilder::clear() 402 { 403 status_ = U_ZERO_ERROR; 404 language_[0] = 0; 405 script_[0] = 0; 406 region_[0] = 0; 407 delete variant_; 408 variant_ = nullptr; 409 clearExtensions(); 410 return *this; 411 } 412 413 LocaleBuilder& LocaleBuilder::clearExtensions() 414 { 415 delete extensions_; 416 extensions_ = nullptr; 417 return *this; 418 } 419 420 Locale makeBogusLocale() { 421 Locale bogus; 422 bogus.setToBogus(); 423 return bogus; 424 } 425 426 void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode) 427 { 428 if (U_FAILURE(errorCode)) { return; } 429 LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode)); 430 if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) { 431 // Error, or no extensions to copy. 432 return; 433 } 434 if (extensions_ == nullptr) { 435 extensions_ = Locale::getRoot().clone(); 436 if (extensions_ == nullptr) { 437 status_ = U_MEMORY_ALLOCATION_ERROR; 438 return; 439 } 440 } 441 _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode); 442 } 443 444 Locale LocaleBuilder::build(UErrorCode& errorCode) 445 { 446 if (U_FAILURE(errorCode)) { 447 return makeBogusLocale(); 448 } 449 if (U_FAILURE(status_)) { 450 errorCode = status_; 451 return makeBogusLocale(); 452 } 453 CharString locale_str(language_, errorCode); 454 if (uprv_strlen(script_) > 0) { 455 locale_str.append('-', errorCode).append(StringPiece(script_), errorCode); 456 } 457 if (uprv_strlen(region_) > 0) { 458 locale_str.append('-', errorCode).append(StringPiece(region_), errorCode); 459 } 460 if (variant_ != nullptr) { 461 locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode); 462 } 463 if (U_FAILURE(errorCode)) { 464 return makeBogusLocale(); 465 } 466 Locale product(locale_str.data()); 467 if (extensions_ != nullptr) { 468 _copyExtensions(*extensions_, nullptr, product, true, errorCode); 469 } 470 if (U_FAILURE(errorCode)) { 471 return makeBogusLocale(); 472 } 473 return product; 474 } 475 476 UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const { 477 if (U_FAILURE(outErrorCode)) { 478 // Do not overwrite the older error code 479 return true; 480 } 481 outErrorCode = status_; 482 return U_FAILURE(outErrorCode); 483 } 484 485 U_NAMESPACE_END