SharedIntlData.cpp (29784B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* Runtime-wide Intl data shared across compartments. */ 8 9 #include "builtin/intl/SharedIntlData.h" 10 11 #include "mozilla/Assertions.h" 12 #include "mozilla/HashFunctions.h" 13 #include "mozilla/intl/Collator.h" 14 #include "mozilla/intl/DateTimeFormat.h" 15 #include "mozilla/intl/DateTimePatternGenerator.h" 16 #include "mozilla/intl/Locale.h" 17 #include "mozilla/intl/NumberFormat.h" 18 #include "mozilla/intl/TimeZone.h" 19 #include "mozilla/Span.h" 20 #include "mozilla/TextUtils.h" 21 22 #include <algorithm> 23 #include <stdint.h> 24 #include <string> 25 #include <string.h> 26 #include <string_view> 27 #include <utility> 28 29 #include "builtin/Array.h" 30 #include "builtin/intl/CommonFunctions.h" 31 #include "builtin/intl/FormatBuffer.h" 32 #include "builtin/intl/TimeZoneDataGenerated.h" 33 #include "js/StableStringChars.h" 34 #include "js/Utility.h" 35 #include "js/Vector.h" 36 #include "vm/ArrayObject.h" 37 #include "vm/JSAtomUtils.h" // Atomize 38 #include "vm/JSContext.h" 39 #include "vm/StringType.h" 40 41 using js::HashNumber; 42 43 template <typename Char> 44 static constexpr Char ToUpperASCII(Char c) { 45 return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c; 46 } 47 48 static_assert(ToUpperASCII('a') == 'A', "verifying 'a' uppercases correctly"); 49 static_assert(ToUpperASCII('m') == 'M', "verifying 'm' uppercases correctly"); 50 static_assert(ToUpperASCII('z') == 'Z', "verifying 'z' uppercases correctly"); 51 static_assert(ToUpperASCII(u'a') == u'A', 52 "verifying u'a' uppercases correctly"); 53 static_assert(ToUpperASCII(u'k') == u'K', 54 "verifying u'k' uppercases correctly"); 55 static_assert(ToUpperASCII(u'z') == u'Z', 56 "verifying u'z' uppercases correctly"); 57 58 template <typename Char> 59 static HashNumber HashStringIgnoreCaseASCII(const Char* s, size_t length) { 60 uint32_t hash = 0; 61 for (size_t i = 0; i < length; i++) { 62 hash = mozilla::AddToHash(hash, ToUpperASCII(s[i])); 63 } 64 return hash; 65 } 66 67 js::intl::SharedIntlData::AvailableTimeZoneHasher::Lookup::Lookup( 68 const JSLinearString* timeZone) 69 : js::intl::SharedIntlData::LinearStringLookup(timeZone) { 70 if (isLatin1) { 71 hash = HashStringIgnoreCaseASCII(latin1Chars, length); 72 } else { 73 hash = HashStringIgnoreCaseASCII(twoByteChars, length); 74 } 75 } 76 77 js::intl::SharedIntlData::AvailableTimeZoneHasher::Lookup::Lookup( 78 const char* chars, size_t length) 79 : js::intl::SharedIntlData::LinearStringLookup(chars, length) { 80 hash = HashStringIgnoreCaseASCII(latin1Chars, length); 81 } 82 83 js::intl::SharedIntlData::AvailableTimeZoneHasher::Lookup::Lookup( 84 const char16_t* chars, size_t length) 85 : js::intl::SharedIntlData::LinearStringLookup(chars, length) { 86 hash = HashStringIgnoreCaseASCII(twoByteChars, length); 87 } 88 89 template <typename Char1, typename Char2> 90 static bool EqualCharsIgnoreCaseASCII(const Char1* s1, const Char2* s2, 91 size_t len) { 92 for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) { 93 if (ToUpperASCII(*s1) != ToUpperASCII(*s2)) { 94 return false; 95 } 96 } 97 return true; 98 } 99 100 bool js::intl::SharedIntlData::AvailableTimeZoneHasher::match( 101 TimeZoneName key, const Lookup& lookup) { 102 if (key->length() != lookup.length) { 103 return false; 104 } 105 106 // Compare time zone names ignoring ASCII case differences. 107 if (key->hasLatin1Chars()) { 108 const Latin1Char* keyChars = key->latin1Chars(lookup.nogc); 109 if (lookup.isLatin1) { 110 return EqualCharsIgnoreCaseASCII(keyChars, lookup.latin1Chars, 111 lookup.length); 112 } 113 return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, 114 lookup.length); 115 } 116 117 const char16_t* keyChars = key->twoByteChars(lookup.nogc); 118 if (lookup.isLatin1) { 119 return EqualCharsIgnoreCaseASCII(lookup.latin1Chars, keyChars, 120 lookup.length); 121 } 122 return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars, 123 lookup.length); 124 } 125 126 static bool IsLegacyICUTimeZone(mozilla::Span<const char> timeZone) { 127 std::string_view timeZoneView(timeZone.data(), timeZone.size()); 128 for (const auto& legacyTimeZone : js::timezone::legacyICUTimeZones) { 129 if (timeZoneView == legacyTimeZone) { 130 return true; 131 } 132 } 133 return false; 134 } 135 136 bool js::intl::SharedIntlData::ensureTimeZones(JSContext* cx) { 137 if (timeZoneDataInitialized) { 138 return true; 139 } 140 141 // If ensureTimeZones() was called previously, but didn't complete due to 142 // OOM, clear all sets/maps and start from scratch. 143 availableTimeZones.clearAndCompact(); 144 145 auto timeZones = mozilla::intl::TimeZone::GetAvailableTimeZones(); 146 if (timeZones.isErr()) { 147 ReportInternalError(cx, timeZones.unwrapErr()); 148 return false; 149 } 150 151 for (auto timeZoneName : timeZones.unwrap()) { 152 if (timeZoneName.isErr()) { 153 ReportInternalError(cx); 154 return false; 155 } 156 auto timeZoneSpan = timeZoneName.unwrap(); 157 158 // Skip legacy ICU time zone names. 159 if (IsLegacyICUTimeZone(timeZoneSpan)) { 160 continue; 161 } 162 163 JSAtom* timeZone = Atomize(cx, timeZoneSpan.data(), timeZoneSpan.size()); 164 if (!timeZone) { 165 return false; 166 } 167 168 auto p = 169 availableTimeZones.lookupForAdd(AvailableTimeZoneSet::Lookup{timeZone}); 170 171 // ICU shouldn't report any duplicate time zone names, but if it does, 172 // just ignore the duplicate name. 173 if (!p && !availableTimeZones.add(p, timeZone)) { 174 ReportOutOfMemory(cx); 175 return false; 176 } 177 } 178 179 ianaZonesTreatedAsLinksByICU.clearAndCompact(); 180 181 for (const char* rawTimeZone : timezone::ianaZonesTreatedAsLinksByICU) { 182 MOZ_ASSERT(rawTimeZone != nullptr); 183 JSAtom* timeZone = Atomize(cx, rawTimeZone, strlen(rawTimeZone)); 184 if (!timeZone) { 185 return false; 186 } 187 188 auto p = ianaZonesTreatedAsLinksByICU.lookupForAdd( 189 TimeZoneSet::Lookup{timeZone}); 190 MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaZonesTreatedAsLinksByICU"); 191 192 if (!ianaZonesTreatedAsLinksByICU.add(p, timeZone)) { 193 ReportOutOfMemory(cx); 194 return false; 195 } 196 } 197 198 ianaLinksCanonicalizedDifferentlyByICU.clearAndCompact(); 199 200 for (const auto& linkAndTarget : 201 timezone::ianaLinksCanonicalizedDifferentlyByICU) { 202 const char* rawLinkName = linkAndTarget.link; 203 const char* rawTarget = linkAndTarget.target; 204 205 MOZ_ASSERT(rawLinkName != nullptr); 206 JSAtom* linkName = Atomize(cx, rawLinkName, strlen(rawLinkName)); 207 if (!linkName) { 208 return false; 209 } 210 211 MOZ_ASSERT(rawTarget != nullptr); 212 JSAtom* target = Atomize(cx, rawTarget, strlen(rawTarget)); 213 if (!target) { 214 return false; 215 } 216 217 auto p = ianaLinksCanonicalizedDifferentlyByICU.lookupForAdd( 218 TimeZoneMap::Lookup{linkName}); 219 MOZ_ASSERT( 220 !p, 221 "Duplicate entry in timezone::ianaLinksCanonicalizedDifferentlyByICU"); 222 223 if (!ianaLinksCanonicalizedDifferentlyByICU.add(p, linkName, target)) { 224 ReportOutOfMemory(cx); 225 return false; 226 } 227 } 228 229 MOZ_ASSERT(!timeZoneDataInitialized, 230 "ensureTimeZones is neither reentrant nor thread-safe"); 231 timeZoneDataInitialized = true; 232 233 return true; 234 } 235 236 JSLinearString* js::intl::SharedIntlData::canonicalizeTimeZone( 237 JSContext* cx, Handle<JSLinearString*> timeZone) { 238 if (!ensureTimeZones(cx)) { 239 return nullptr; 240 } 241 242 auto availablePtr = 243 availableTimeZones.lookup(AvailableTimeZoneSet::Lookup{timeZone}); 244 MOZ_ASSERT(availablePtr.found(), "Invalid time zone name"); 245 246 Rooted<JSAtom*> availableTimeZone(cx, *availablePtr); 247 return canonicalizeAvailableTimeZone(cx, availableTimeZone); 248 } 249 250 bool js::intl::SharedIntlData::validateAndCanonicalizeTimeZone( 251 JSContext* cx, const AvailableTimeZoneSet::Lookup& lookup, 252 MutableHandle<JSAtom*> identifier, MutableHandle<JSAtom*> primary) { 253 MOZ_ASSERT(timeZoneDataInitialized); 254 255 auto availablePtr = availableTimeZones.lookup(lookup); 256 if (!availablePtr) { 257 return true; 258 } 259 260 Rooted<JSAtom*> availableTimeZone(cx, *availablePtr); 261 JSAtom* canonicalTimeZone = 262 canonicalizeAvailableTimeZone(cx, availableTimeZone); 263 if (!canonicalTimeZone) { 264 return false; 265 } 266 267 cx->markAtom(availableTimeZone); 268 MOZ_ASSERT(AtomIsMarked(cx->zone(), canonicalTimeZone), 269 "canonicalizeAvailableTimeZone already marked the atom"); 270 271 identifier.set(availableTimeZone); 272 primary.set(canonicalTimeZone); 273 return true; 274 } 275 276 bool js::intl::SharedIntlData::validateAndCanonicalizeTimeZone( 277 JSContext* cx, Handle<JSLinearString*> timeZone, 278 MutableHandle<JSAtom*> identifier, MutableHandle<JSAtom*> primary) { 279 if (!ensureTimeZones(cx)) { 280 return false; 281 } 282 return validateAndCanonicalizeTimeZone( 283 cx, AvailableTimeZoneSet::Lookup{timeZone}, identifier, primary); 284 } 285 286 bool js::intl::SharedIntlData::validateAndCanonicalizeTimeZone( 287 JSContext* cx, mozilla::Span<const char> timeZone, 288 MutableHandle<JSAtom*> identifier, MutableHandle<JSAtom*> primary) { 289 if (!ensureTimeZones(cx)) { 290 return false; 291 } 292 return validateAndCanonicalizeTimeZone( 293 cx, AvailableTimeZoneSet::Lookup{timeZone.data(), timeZone.size()}, 294 identifier, primary); 295 } 296 297 JSAtom* js::intl::SharedIntlData::canonicalizeAvailableTimeZone( 298 JSContext* cx, Handle<JSAtom*> availableTimeZone) { 299 MOZ_ASSERT(timeZoneDataInitialized); 300 MOZ_ASSERT( 301 availableTimeZones.has(AvailableTimeZoneSet::Lookup{availableTimeZone}), 302 "Invalid time zone name"); 303 304 // Some time zone names are canonicalized differently by ICU. 305 auto* canonicalTimeZone = 306 tryCanonicalizeTimeZoneConsistentWithIANA(availableTimeZone); 307 if (canonicalTimeZone) { 308 cx->markAtom(canonicalTimeZone); 309 return canonicalTimeZone; 310 } 311 312 JS::AutoStableStringChars stableChars(cx); 313 if (!stableChars.initTwoByte(cx, availableTimeZone)) { 314 return nullptr; 315 } 316 317 using TimeZone = mozilla::intl::TimeZone; 318 319 intl::FormatBuffer<char16_t, TimeZone::TimeZoneIdentifierLength> buffer(cx); 320 auto result = 321 TimeZone::GetCanonicalTimeZoneID(stableChars.twoByteRange(), buffer); 322 if (result.isErr()) { 323 intl::ReportInternalError(cx, result.unwrapErr()); 324 return nullptr; 325 } 326 MOZ_ASSERT(std::u16string_view(u"Etc/Unknown") != 327 std::u16string_view(buffer.data(), buffer.length()), 328 "Invalid canonical time zone"); 329 330 auto availablePtr = availableTimeZones.lookup( 331 AvailableTimeZoneSet::Lookup{buffer.data(), buffer.length()}); 332 MOZ_ASSERT(availablePtr, "Invalid time zone name"); 333 334 cx->markAtom(*availablePtr); 335 return *availablePtr; 336 } 337 338 JSAtom* js::intl::SharedIntlData::tryCanonicalizeTimeZoneConsistentWithIANA( 339 JSAtom* availableTimeZone) { 340 MOZ_ASSERT(timeZoneDataInitialized); 341 MOZ_ASSERT( 342 availableTimeZones.has(AvailableTimeZoneSet::Lookup{availableTimeZone}), 343 "Invalid time zone name"); 344 345 TimeZoneMap::Lookup lookup(availableTimeZone); 346 if (TimeZoneMap::Ptr p = 347 ianaLinksCanonicalizedDifferentlyByICU.lookup(lookup)) { 348 // The effectively supported time zones aren't known at compile time, 349 // when 350 // 1. SpiderMonkey was compiled with "--with-system-icu". 351 // 2. ICU's dynamic time zone data loading feature was used. 352 // (ICU supports loading time zone files at runtime through the 353 // ICU_TIMEZONE_FILES_DIR environment variable.) 354 // Ensure ICU supports the new target zone before applying the update. 355 TimeZoneName targetTimeZone = p->value(); 356 if (availableTimeZones.has(AvailableTimeZoneSet::Lookup{targetTimeZone})) { 357 return targetTimeZone; 358 } 359 } else if (TimeZoneSet::Ptr p = ianaZonesTreatedAsLinksByICU.lookup(lookup)) { 360 return *p; 361 } 362 return nullptr; 363 } 364 365 JS::Result<js::intl::SharedIntlData::AvailableTimeZoneSet::Iterator> 366 js::intl::SharedIntlData::availableTimeZonesIteration(JSContext* cx) { 367 if (!ensureTimeZones(cx)) { 368 return cx->alreadyReportedError(); 369 } 370 return availableTimeZones.iter(); 371 } 372 373 js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup( 374 const JSLinearString* locale) 375 : js::intl::SharedIntlData::LinearStringLookup(locale) { 376 if (isLatin1) { 377 hash = mozilla::HashString(latin1Chars, length); 378 } else { 379 hash = mozilla::HashString(twoByteChars, length); 380 } 381 } 382 383 js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(const char* chars, 384 size_t length) 385 : js::intl::SharedIntlData::LinearStringLookup(chars, length) { 386 hash = mozilla::HashString(latin1Chars, length); 387 } 388 389 bool js::intl::SharedIntlData::LocaleHasher::match(Locale key, 390 const Lookup& lookup) { 391 if (key->length() != lookup.length) { 392 return false; 393 } 394 395 if (key->hasLatin1Chars()) { 396 const Latin1Char* keyChars = key->latin1Chars(lookup.nogc); 397 if (lookup.isLatin1) { 398 return EqualChars(keyChars, lookup.latin1Chars, lookup.length); 399 } 400 return EqualChars(keyChars, lookup.twoByteChars, lookup.length); 401 } 402 403 const char16_t* keyChars = key->twoByteChars(lookup.nogc); 404 if (lookup.isLatin1) { 405 return EqualChars(lookup.latin1Chars, keyChars, lookup.length); 406 } 407 return EqualChars(keyChars, lookup.twoByteChars, lookup.length); 408 } 409 410 template <class AvailableLocales> 411 bool js::intl::SharedIntlData::getAvailableLocales( 412 JSContext* cx, LocaleSet& locales, 413 const AvailableLocales& availableLocales) { 414 auto addLocale = [cx, &locales](const char* locale, size_t length) { 415 JSAtom* atom = Atomize(cx, locale, length); 416 if (!atom) { 417 return false; 418 } 419 420 LocaleHasher::Lookup lookup(atom); 421 LocaleSet::AddPtr p = locales.lookupForAdd(lookup); 422 423 // ICU shouldn't report any duplicate locales, but if it does, just 424 // ignore the duplicated locale. 425 if (!p && !locales.add(p, atom)) { 426 ReportOutOfMemory(cx); 427 return false; 428 } 429 430 return true; 431 }; 432 433 js::Vector<char, 16> lang(cx); 434 435 for (const char* locale : availableLocales) { 436 size_t length = strlen(locale); 437 438 lang.clear(); 439 if (!lang.append(locale, length)) { 440 return false; 441 } 442 MOZ_ASSERT(lang.length() == length); 443 444 std::replace(lang.begin(), lang.end(), '_', '-'); 445 446 if (!addLocale(lang.begin(), length)) { 447 return false; 448 } 449 450 // From <https://tc39.es/ecma402/#sec-internal-slots>: 451 // 452 // For locales that include a script subtag in addition to language and 453 // region, the corresponding locale without a script subtag must also be 454 // supported; that is, if an implementation recognizes "zh-Hant-TW", it is 455 // also expected to recognize "zh-TW". 456 457 // 2 * Alpha language subtag 458 // + 1 separator 459 // + 4 * Alphanum script subtag 460 // + 1 separator 461 // + 2 * Alpha region subtag 462 using namespace mozilla::intl::LanguageTagLimits; 463 static constexpr size_t MinLanguageLength = 2; 464 static constexpr size_t MinLengthForScriptAndRegion = 465 MinLanguageLength + 1 + ScriptLength + 1 + AlphaRegionLength; 466 467 // Fast case: Skip locales without script subtags. 468 if (length < MinLengthForScriptAndRegion) { 469 continue; 470 } 471 472 // We don't need the full-fledged language tag parser when we just want to 473 // remove the script subtag. 474 475 // Find the separator between the language and script subtags. 476 const char* sep = std::char_traits<char>::find(lang.begin(), length, '-'); 477 if (!sep) { 478 continue; 479 } 480 481 // Possible |script| subtag start position. 482 const char* script = sep + 1; 483 484 // Find the separator between the script and region subtags. 485 sep = std::char_traits<char>::find(script, lang.end() - script, '-'); 486 if (!sep) { 487 continue; 488 } 489 490 // Continue with the next locale if we didn't find a script subtag. 491 size_t scriptLength = sep - script; 492 if (!mozilla::intl::IsStructurallyValidScriptTag<char>( 493 {script, scriptLength})) { 494 continue; 495 } 496 497 // Possible |region| subtag start position. 498 const char* region = sep + 1; 499 500 // Search if there's yet another subtag after the region subtag. 501 sep = std::char_traits<char>::find(region, lang.end() - region, '-'); 502 503 // Continue with the next locale if we didn't find a region subtag. 504 size_t regionLength = (sep ? sep : lang.end()) - region; 505 if (!mozilla::intl::IsStructurallyValidRegionTag<char>( 506 {region, regionLength})) { 507 continue; 508 } 509 510 // We've found a script and a region subtag. 511 512 static constexpr size_t ScriptWithSeparatorLength = ScriptLength + 1; 513 514 // Remove the script subtag. Note: erase() needs non-const pointers, which 515 // means we can't directly pass |script|. 516 char* p = const_cast<char*>(script); 517 lang.erase(p, p + ScriptWithSeparatorLength); 518 519 MOZ_ASSERT(lang.length() == length - ScriptWithSeparatorLength); 520 521 // Add the locale with the script subtag removed. 522 if (!addLocale(lang.begin(), lang.length())) { 523 return false; 524 } 525 } 526 527 // Forcibly add an entry for the last-ditch locale, in case ICU doesn't 528 // directly support it (but does support it through fallback, e.g. supporting 529 // "en-GB" indirectly using "en" support). 530 { 531 const char* lastDitch = intl::LastDitchLocale(); 532 MOZ_ASSERT(strcmp(lastDitch, "en-GB") == 0); 533 534 #ifdef DEBUG 535 static constexpr char lastDitchParent[] = "en"; 536 537 LocaleHasher::Lookup lookup(lastDitchParent, strlen(lastDitchParent)); 538 MOZ_ASSERT(locales.has(lookup), 539 "shouldn't be a need to add every locale implied by the " 540 "last-ditch locale, merely just the last-ditch locale"); 541 #endif 542 543 if (!addLocale(lastDitch, strlen(lastDitch))) { 544 return false; 545 } 546 } 547 548 return true; 549 } 550 551 #ifdef DEBUG 552 template <class AvailableLocales1, class AvailableLocales2> 553 static bool IsSameAvailableLocales(const AvailableLocales1& availableLocales1, 554 const AvailableLocales2& availableLocales2) { 555 return std::equal(std::begin(availableLocales1), std::end(availableLocales1), 556 std::begin(availableLocales2), std::end(availableLocales2), 557 [](const char* a, const char* b) { 558 // Intentionally comparing pointer equivalence. 559 return a == b; 560 }); 561 } 562 #endif 563 564 bool js::intl::SharedIntlData::ensureAvailableLocales(JSContext* cx) { 565 if (availableLocalesInitialized) { 566 return true; 567 } 568 569 // If ensureAvailableLocales() was called previously, but didn't complete due 570 // to OOM, clear all data and start from scratch. 571 availableLocales.clearAndCompact(); 572 collatorAvailableLocales.clearAndCompact(); 573 574 if (!getAvailableLocales(cx, availableLocales, 575 mozilla::intl::Locale::GetAvailableLocales())) { 576 return false; 577 } 578 if (!getAvailableLocales(cx, collatorAvailableLocales, 579 mozilla::intl::Collator::GetAvailableLocales())) { 580 return false; 581 } 582 583 MOZ_ASSERT(IsSameAvailableLocales( 584 mozilla::intl::Locale::GetAvailableLocales(), 585 mozilla::intl::DateTimeFormat::GetAvailableLocales())); 586 587 MOZ_ASSERT(IsSameAvailableLocales( 588 mozilla::intl::Locale::GetAvailableLocales(), 589 mozilla::intl::NumberFormat::GetAvailableLocales())); 590 591 MOZ_ASSERT(!availableLocalesInitialized, 592 "ensureAvailableLocales is neither reentrant nor thread-safe"); 593 availableLocalesInitialized = true; 594 595 return true; 596 } 597 598 bool js::intl::SharedIntlData::isAvailableLocale(JSContext* cx, 599 AvailableLocaleKind kind, 600 Handle<JSLinearString*> locale, 601 bool* available) { 602 if (!ensureAvailableLocales(cx)) { 603 return false; 604 } 605 606 LocaleHasher::Lookup lookup(locale); 607 608 switch (kind) { 609 case AvailableLocaleKind::Collator: 610 *available = collatorAvailableLocales.has(lookup); 611 return true; 612 case AvailableLocaleKind::DateTimeFormat: 613 case AvailableLocaleKind::DisplayNames: 614 case AvailableLocaleKind::DurationFormat: 615 case AvailableLocaleKind::ListFormat: 616 case AvailableLocaleKind::NumberFormat: 617 case AvailableLocaleKind::PluralRules: 618 case AvailableLocaleKind::RelativeTimeFormat: 619 case AvailableLocaleKind::Segmenter: 620 *available = availableLocales.has(lookup); 621 return true; 622 } 623 MOZ_CRASH("Invalid Intl constructor"); 624 } 625 626 js::ArrayObject* js::intl::SharedIntlData::availableLocalesOf( 627 JSContext* cx, AvailableLocaleKind kind) { 628 if (!ensureAvailableLocales(cx)) { 629 return nullptr; 630 } 631 632 LocaleSet* localeSet = nullptr; 633 switch (kind) { 634 case AvailableLocaleKind::Collator: 635 localeSet = &collatorAvailableLocales; 636 break; 637 case AvailableLocaleKind::DateTimeFormat: 638 case AvailableLocaleKind::DisplayNames: 639 case AvailableLocaleKind::DurationFormat: 640 case AvailableLocaleKind::ListFormat: 641 case AvailableLocaleKind::NumberFormat: 642 case AvailableLocaleKind::PluralRules: 643 case AvailableLocaleKind::RelativeTimeFormat: 644 case AvailableLocaleKind::Segmenter: 645 localeSet = &availableLocales; 646 break; 647 default: 648 MOZ_CRASH("Invalid Intl constructor"); 649 } 650 651 const uint32_t count = localeSet->count(); 652 ArrayObject* result = NewDenseFullyAllocatedArray(cx, count); 653 if (!result) { 654 return nullptr; 655 } 656 result->setDenseInitializedLength(count); 657 658 uint32_t index = 0; 659 for (auto range = localeSet->iter(); !range.done(); range.next()) { 660 JSAtom* locale = range.get(); 661 cx->markAtom(locale); 662 663 result->initDenseElement(index++, StringValue(locale)); 664 } 665 MOZ_ASSERT(index == count); 666 667 return result; 668 } 669 670 #if DEBUG || MOZ_SYSTEM_ICU 671 bool js::intl::SharedIntlData::ensureUpperCaseFirstLocales(JSContext* cx) { 672 if (upperCaseFirstInitialized) { 673 return true; 674 } 675 676 // If ensureUpperCaseFirstLocales() was called previously, but didn't 677 // complete due to OOM, clear all data and start from scratch. 678 upperCaseFirstLocales.clearAndCompact(); 679 680 for (const char* rawLocale : mozilla::intl::Collator::GetAvailableLocales()) { 681 auto collator = mozilla::intl::Collator::TryCreate(rawLocale); 682 if (collator.isErr()) { 683 ReportInternalError(cx, collator.unwrapErr()); 684 return false; 685 } 686 687 auto caseFirst = collator.unwrap()->GetCaseFirst(); 688 if (caseFirst.isErr()) { 689 ReportInternalError(cx, caseFirst.unwrapErr()); 690 return false; 691 } 692 693 if (caseFirst.unwrap() != mozilla::intl::Collator::CaseFirst::Upper) { 694 continue; 695 } 696 697 JSAtom* locale = Atomize(cx, rawLocale, strlen(rawLocale)); 698 if (!locale) { 699 return false; 700 } 701 702 LocaleHasher::Lookup lookup(locale); 703 LocaleSet::AddPtr p = upperCaseFirstLocales.lookupForAdd(lookup); 704 705 // ICU shouldn't report any duplicate locales, but if it does, just 706 // ignore the duplicated locale. 707 if (!p && !upperCaseFirstLocales.add(p, locale)) { 708 ReportOutOfMemory(cx); 709 return false; 710 } 711 } 712 713 MOZ_ASSERT( 714 !upperCaseFirstInitialized, 715 "ensureUpperCaseFirstLocales is neither reentrant nor thread-safe"); 716 upperCaseFirstInitialized = true; 717 718 return true; 719 } 720 #endif // DEBUG || MOZ_SYSTEM_ICU 721 722 bool js::intl::SharedIntlData::isUpperCaseFirst(JSContext* cx, 723 Handle<JSLinearString*> locale, 724 bool* isUpperFirst) { 725 #if DEBUG || MOZ_SYSTEM_ICU 726 if (!ensureUpperCaseFirstLocales(cx)) { 727 return false; 728 } 729 #endif 730 731 #if !MOZ_SYSTEM_ICU 732 // "da" (Danish) and "mt" (Maltese) are the only two supported locales using 733 // upper-case first. CLDR also lists "cu" (Church Slavic) as an upper-case 734 // first locale, but since it's not supported in ICU, we don't care about it 735 // here. 736 bool isDefaultUpperCaseFirstLocale = js::StringEqualsLiteral(locale, "da") || 737 js::StringEqualsLiteral(locale, "mt"); 738 #endif 739 740 #if DEBUG || MOZ_SYSTEM_ICU 741 LocaleHasher::Lookup lookup(locale); 742 *isUpperFirst = upperCaseFirstLocales.has(lookup); 743 #else 744 *isUpperFirst = isDefaultUpperCaseFirstLocale; 745 #endif 746 747 #if !MOZ_SYSTEM_ICU 748 MOZ_ASSERT(*isUpperFirst == isDefaultUpperCaseFirstLocale, 749 "upper-case first locales don't match hard-coded list"); 750 #endif 751 752 return true; 753 } 754 755 #if DEBUG || MOZ_SYSTEM_ICU 756 bool js::intl::SharedIntlData::ensureIgnorePunctuationLocales(JSContext* cx) { 757 if (ignorePunctuationInitialized) { 758 return true; 759 } 760 761 // If ensureIgnorePunctuationLocales() was called previously, but didn't 762 // complete due to OOM, clear all data and start from scratch. 763 ignorePunctuationLocales.clearAndCompact(); 764 765 for (const char* rawLocale : mozilla::intl::Collator::GetAvailableLocales()) { 766 auto collator = mozilla::intl::Collator::TryCreate(rawLocale); 767 if (collator.isErr()) { 768 ReportInternalError(cx, collator.unwrapErr()); 769 return false; 770 } 771 772 auto ignorePunctuation = collator.unwrap()->GetIgnorePunctuation(); 773 if (ignorePunctuation.isErr()) { 774 ReportInternalError(cx, ignorePunctuation.unwrapErr()); 775 return false; 776 } 777 778 if (!ignorePunctuation.unwrap()) { 779 continue; 780 } 781 782 JSAtom* locale = Atomize(cx, rawLocale, strlen(rawLocale)); 783 if (!locale) { 784 return false; 785 } 786 787 LocaleHasher::Lookup lookup(locale); 788 LocaleSet::AddPtr p = ignorePunctuationLocales.lookupForAdd(lookup); 789 790 // ICU shouldn't report any duplicate locales, but if it does, just 791 // ignore the duplicated locale. 792 if (!p && !ignorePunctuationLocales.add(p, locale)) { 793 ReportOutOfMemory(cx); 794 return false; 795 } 796 } 797 798 MOZ_ASSERT( 799 !ignorePunctuationInitialized, 800 "ensureIgnorePunctuationLocales is neither reentrant nor thread-safe"); 801 ignorePunctuationInitialized = true; 802 803 return true; 804 } 805 #endif // DEBUG || MOZ_SYSTEM_ICU 806 807 bool js::intl::SharedIntlData::isIgnorePunctuation( 808 JSContext* cx, Handle<JSLinearString*> locale, bool* ignorePunctuation) { 809 #if DEBUG || MOZ_SYSTEM_ICU 810 if (!ensureIgnorePunctuationLocales(cx)) { 811 return false; 812 } 813 #endif 814 815 #if !MOZ_SYSTEM_ICU 816 // "th" (Thai) is the only supported locale which ignores punctuation by 817 // default. 818 bool isDefaultIgnorePunctuationLocale = js::StringEqualsLiteral(locale, "th"); 819 #endif 820 821 #if DEBUG || MOZ_SYSTEM_ICU 822 LocaleHasher::Lookup lookup(locale); 823 *ignorePunctuation = ignorePunctuationLocales.has(lookup); 824 #else 825 *ignorePunctuation = isDefaultIgnorePunctuationLocale; 826 #endif 827 828 #if !MOZ_SYSTEM_ICU 829 MOZ_ASSERT(*ignorePunctuation == isDefaultIgnorePunctuationLocale, 830 "ignore punctuation locales don't match hard-coded list"); 831 #endif 832 833 return true; 834 } 835 836 void js::intl::DateTimePatternGeneratorDeleter::operator()( 837 mozilla::intl::DateTimePatternGenerator* ptr) { 838 delete ptr; 839 } 840 841 static bool StringsAreEqual(const char* s1, const char* s2) { 842 return !strcmp(s1, s2); 843 } 844 845 mozilla::intl::DateTimePatternGenerator* 846 js::intl::SharedIntlData::getDateTimePatternGenerator(JSContext* cx, 847 const char* locale) { 848 // Return the cached instance if the requested locale matches the locale 849 // of the cached generator. 850 if (dateTimePatternGeneratorLocale && 851 StringsAreEqual(dateTimePatternGeneratorLocale.get(), locale)) { 852 return dateTimePatternGenerator.get(); 853 } 854 855 auto result = mozilla::intl::DateTimePatternGenerator::TryCreate(locale); 856 if (result.isErr()) { 857 intl::ReportInternalError(cx, result.unwrapErr()); 858 return nullptr; 859 } 860 // The UniquePtr needs to be recreated as it's using a different Deleter in 861 // order to be able to forward declare DateTimePatternGenerator in 862 // SharedIntlData.h. 863 UniqueDateTimePatternGenerator gen(result.unwrap().release()); 864 865 JS::UniqueChars localeCopy = js::DuplicateString(cx, locale); 866 if (!localeCopy) { 867 return nullptr; 868 } 869 870 dateTimePatternGenerator = std::move(gen); 871 dateTimePatternGeneratorLocale = std::move(localeCopy); 872 873 return dateTimePatternGenerator.get(); 874 } 875 876 void js::intl::SharedIntlData::destroyInstance() { 877 availableTimeZones.clearAndCompact(); 878 ianaZonesTreatedAsLinksByICU.clearAndCompact(); 879 ianaLinksCanonicalizedDifferentlyByICU.clearAndCompact(); 880 availableLocales.clearAndCompact(); 881 collatorAvailableLocales.clearAndCompact(); 882 #if DEBUG || MOZ_SYSTEM_ICU 883 upperCaseFirstLocales.clearAndCompact(); 884 ignorePunctuationLocales.clearAndCompact(); 885 #endif 886 } 887 888 void js::intl::SharedIntlData::trace(JSTracer* trc) { 889 // Atoms are always tenured. 890 if (!JS::RuntimeHeapIsMinorCollecting()) { 891 availableTimeZones.trace(trc); 892 ianaZonesTreatedAsLinksByICU.trace(trc); 893 ianaLinksCanonicalizedDifferentlyByICU.trace(trc); 894 availableLocales.trace(trc); 895 collatorAvailableLocales.trace(trc); 896 #if DEBUG || MOZ_SYSTEM_ICU 897 upperCaseFirstLocales.trace(trc); 898 ignorePunctuationLocales.trace(trc); 899 #endif 900 } 901 } 902 903 size_t js::intl::SharedIntlData::sizeOfExcludingThis( 904 mozilla::MallocSizeOf mallocSizeOf) const { 905 return availableTimeZones.shallowSizeOfExcludingThis(mallocSizeOf) + 906 ianaZonesTreatedAsLinksByICU.shallowSizeOfExcludingThis(mallocSizeOf) + 907 ianaLinksCanonicalizedDifferentlyByICU.shallowSizeOfExcludingThis( 908 mallocSizeOf) + 909 availableLocales.shallowSizeOfExcludingThis(mallocSizeOf) + 910 collatorAvailableLocales.shallowSizeOfExcludingThis(mallocSizeOf) + 911 #if DEBUG || MOZ_SYSTEM_ICU 912 upperCaseFirstLocales.shallowSizeOfExcludingThis(mallocSizeOf) + 913 ignorePunctuationLocales.shallowSizeOfExcludingThis(mallocSizeOf) + 914 #endif 915 mallocSizeOf(dateTimePatternGeneratorLocale.get()); 916 }