tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SharedIntlData.cpp (29784B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 /* Runtime-wide Intl data shared across compartments. */
      8 
      9 #include "builtin/intl/SharedIntlData.h"
     10 
     11 #include "mozilla/Assertions.h"
     12 #include "mozilla/HashFunctions.h"
     13 #include "mozilla/intl/Collator.h"
     14 #include "mozilla/intl/DateTimeFormat.h"
     15 #include "mozilla/intl/DateTimePatternGenerator.h"
     16 #include "mozilla/intl/Locale.h"
     17 #include "mozilla/intl/NumberFormat.h"
     18 #include "mozilla/intl/TimeZone.h"
     19 #include "mozilla/Span.h"
     20 #include "mozilla/TextUtils.h"
     21 
     22 #include <algorithm>
     23 #include <stdint.h>
     24 #include <string>
     25 #include <string.h>
     26 #include <string_view>
     27 #include <utility>
     28 
     29 #include "builtin/Array.h"
     30 #include "builtin/intl/CommonFunctions.h"
     31 #include "builtin/intl/FormatBuffer.h"
     32 #include "builtin/intl/TimeZoneDataGenerated.h"
     33 #include "js/StableStringChars.h"
     34 #include "js/Utility.h"
     35 #include "js/Vector.h"
     36 #include "vm/ArrayObject.h"
     37 #include "vm/JSAtomUtils.h"  // Atomize
     38 #include "vm/JSContext.h"
     39 #include "vm/StringType.h"
     40 
     41 using js::HashNumber;
     42 
     43 template <typename Char>
     44 static constexpr Char ToUpperASCII(Char c) {
     45  return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c;
     46 }
     47 
     48 static_assert(ToUpperASCII('a') == 'A', "verifying 'a' uppercases correctly");
     49 static_assert(ToUpperASCII('m') == 'M', "verifying 'm' uppercases correctly");
     50 static_assert(ToUpperASCII('z') == 'Z', "verifying 'z' uppercases correctly");
     51 static_assert(ToUpperASCII(u'a') == u'A',
     52              "verifying u'a' uppercases correctly");
     53 static_assert(ToUpperASCII(u'k') == u'K',
     54              "verifying u'k' uppercases correctly");
     55 static_assert(ToUpperASCII(u'z') == u'Z',
     56              "verifying u'z' uppercases correctly");
     57 
     58 template <typename Char>
     59 static HashNumber HashStringIgnoreCaseASCII(const Char* s, size_t length) {
     60  uint32_t hash = 0;
     61  for (size_t i = 0; i < length; i++) {
     62    hash = mozilla::AddToHash(hash, ToUpperASCII(s[i]));
     63  }
     64  return hash;
     65 }
     66 
     67 js::intl::SharedIntlData::AvailableTimeZoneHasher::Lookup::Lookup(
     68    const JSLinearString* timeZone)
     69    : js::intl::SharedIntlData::LinearStringLookup(timeZone) {
     70  if (isLatin1) {
     71    hash = HashStringIgnoreCaseASCII(latin1Chars, length);
     72  } else {
     73    hash = HashStringIgnoreCaseASCII(twoByteChars, length);
     74  }
     75 }
     76 
     77 js::intl::SharedIntlData::AvailableTimeZoneHasher::Lookup::Lookup(
     78    const char* chars, size_t length)
     79    : js::intl::SharedIntlData::LinearStringLookup(chars, length) {
     80  hash = HashStringIgnoreCaseASCII(latin1Chars, length);
     81 }
     82 
     83 js::intl::SharedIntlData::AvailableTimeZoneHasher::Lookup::Lookup(
     84    const char16_t* chars, size_t length)
     85    : js::intl::SharedIntlData::LinearStringLookup(chars, length) {
     86  hash = HashStringIgnoreCaseASCII(twoByteChars, length);
     87 }
     88 
     89 template <typename Char1, typename Char2>
     90 static bool EqualCharsIgnoreCaseASCII(const Char1* s1, const Char2* s2,
     91                                      size_t len) {
     92  for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) {
     93    if (ToUpperASCII(*s1) != ToUpperASCII(*s2)) {
     94      return false;
     95    }
     96  }
     97  return true;
     98 }
     99 
    100 bool js::intl::SharedIntlData::AvailableTimeZoneHasher::match(
    101    TimeZoneName key, const Lookup& lookup) {
    102  if (key->length() != lookup.length) {
    103    return false;
    104  }
    105 
    106  // Compare time zone names ignoring ASCII case differences.
    107  if (key->hasLatin1Chars()) {
    108    const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
    109    if (lookup.isLatin1) {
    110      return EqualCharsIgnoreCaseASCII(keyChars, lookup.latin1Chars,
    111                                       lookup.length);
    112    }
    113    return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars,
    114                                     lookup.length);
    115  }
    116 
    117  const char16_t* keyChars = key->twoByteChars(lookup.nogc);
    118  if (lookup.isLatin1) {
    119    return EqualCharsIgnoreCaseASCII(lookup.latin1Chars, keyChars,
    120                                     lookup.length);
    121  }
    122  return EqualCharsIgnoreCaseASCII(keyChars, lookup.twoByteChars,
    123                                   lookup.length);
    124 }
    125 
    126 static bool IsLegacyICUTimeZone(mozilla::Span<const char> timeZone) {
    127  std::string_view timeZoneView(timeZone.data(), timeZone.size());
    128  for (const auto& legacyTimeZone : js::timezone::legacyICUTimeZones) {
    129    if (timeZoneView == legacyTimeZone) {
    130      return true;
    131    }
    132  }
    133  return false;
    134 }
    135 
    136 bool js::intl::SharedIntlData::ensureTimeZones(JSContext* cx) {
    137  if (timeZoneDataInitialized) {
    138    return true;
    139  }
    140 
    141  // If ensureTimeZones() was called previously, but didn't complete due to
    142  // OOM, clear all sets/maps and start from scratch.
    143  availableTimeZones.clearAndCompact();
    144 
    145  auto timeZones = mozilla::intl::TimeZone::GetAvailableTimeZones();
    146  if (timeZones.isErr()) {
    147    ReportInternalError(cx, timeZones.unwrapErr());
    148    return false;
    149  }
    150 
    151  for (auto timeZoneName : timeZones.unwrap()) {
    152    if (timeZoneName.isErr()) {
    153      ReportInternalError(cx);
    154      return false;
    155    }
    156    auto timeZoneSpan = timeZoneName.unwrap();
    157 
    158    // Skip legacy ICU time zone names.
    159    if (IsLegacyICUTimeZone(timeZoneSpan)) {
    160      continue;
    161    }
    162 
    163    JSAtom* timeZone = Atomize(cx, timeZoneSpan.data(), timeZoneSpan.size());
    164    if (!timeZone) {
    165      return false;
    166    }
    167 
    168    auto p =
    169        availableTimeZones.lookupForAdd(AvailableTimeZoneSet::Lookup{timeZone});
    170 
    171    // ICU shouldn't report any duplicate time zone names, but if it does,
    172    // just ignore the duplicate name.
    173    if (!p && !availableTimeZones.add(p, timeZone)) {
    174      ReportOutOfMemory(cx);
    175      return false;
    176    }
    177  }
    178 
    179  ianaZonesTreatedAsLinksByICU.clearAndCompact();
    180 
    181  for (const char* rawTimeZone : timezone::ianaZonesTreatedAsLinksByICU) {
    182    MOZ_ASSERT(rawTimeZone != nullptr);
    183    JSAtom* timeZone = Atomize(cx, rawTimeZone, strlen(rawTimeZone));
    184    if (!timeZone) {
    185      return false;
    186    }
    187 
    188    auto p = ianaZonesTreatedAsLinksByICU.lookupForAdd(
    189        TimeZoneSet::Lookup{timeZone});
    190    MOZ_ASSERT(!p, "Duplicate entry in timezone::ianaZonesTreatedAsLinksByICU");
    191 
    192    if (!ianaZonesTreatedAsLinksByICU.add(p, timeZone)) {
    193      ReportOutOfMemory(cx);
    194      return false;
    195    }
    196  }
    197 
    198  ianaLinksCanonicalizedDifferentlyByICU.clearAndCompact();
    199 
    200  for (const auto& linkAndTarget :
    201       timezone::ianaLinksCanonicalizedDifferentlyByICU) {
    202    const char* rawLinkName = linkAndTarget.link;
    203    const char* rawTarget = linkAndTarget.target;
    204 
    205    MOZ_ASSERT(rawLinkName != nullptr);
    206    JSAtom* linkName = Atomize(cx, rawLinkName, strlen(rawLinkName));
    207    if (!linkName) {
    208      return false;
    209    }
    210 
    211    MOZ_ASSERT(rawTarget != nullptr);
    212    JSAtom* target = Atomize(cx, rawTarget, strlen(rawTarget));
    213    if (!target) {
    214      return false;
    215    }
    216 
    217    auto p = ianaLinksCanonicalizedDifferentlyByICU.lookupForAdd(
    218        TimeZoneMap::Lookup{linkName});
    219    MOZ_ASSERT(
    220        !p,
    221        "Duplicate entry in timezone::ianaLinksCanonicalizedDifferentlyByICU");
    222 
    223    if (!ianaLinksCanonicalizedDifferentlyByICU.add(p, linkName, target)) {
    224      ReportOutOfMemory(cx);
    225      return false;
    226    }
    227  }
    228 
    229  MOZ_ASSERT(!timeZoneDataInitialized,
    230             "ensureTimeZones is neither reentrant nor thread-safe");
    231  timeZoneDataInitialized = true;
    232 
    233  return true;
    234 }
    235 
    236 JSLinearString* js::intl::SharedIntlData::canonicalizeTimeZone(
    237    JSContext* cx, Handle<JSLinearString*> timeZone) {
    238  if (!ensureTimeZones(cx)) {
    239    return nullptr;
    240  }
    241 
    242  auto availablePtr =
    243      availableTimeZones.lookup(AvailableTimeZoneSet::Lookup{timeZone});
    244  MOZ_ASSERT(availablePtr.found(), "Invalid time zone name");
    245 
    246  Rooted<JSAtom*> availableTimeZone(cx, *availablePtr);
    247  return canonicalizeAvailableTimeZone(cx, availableTimeZone);
    248 }
    249 
    250 bool js::intl::SharedIntlData::validateAndCanonicalizeTimeZone(
    251    JSContext* cx, const AvailableTimeZoneSet::Lookup& lookup,
    252    MutableHandle<JSAtom*> identifier, MutableHandle<JSAtom*> primary) {
    253  MOZ_ASSERT(timeZoneDataInitialized);
    254 
    255  auto availablePtr = availableTimeZones.lookup(lookup);
    256  if (!availablePtr) {
    257    return true;
    258  }
    259 
    260  Rooted<JSAtom*> availableTimeZone(cx, *availablePtr);
    261  JSAtom* canonicalTimeZone =
    262      canonicalizeAvailableTimeZone(cx, availableTimeZone);
    263  if (!canonicalTimeZone) {
    264    return false;
    265  }
    266 
    267  cx->markAtom(availableTimeZone);
    268  MOZ_ASSERT(AtomIsMarked(cx->zone(), canonicalTimeZone),
    269             "canonicalizeAvailableTimeZone already marked the atom");
    270 
    271  identifier.set(availableTimeZone);
    272  primary.set(canonicalTimeZone);
    273  return true;
    274 }
    275 
    276 bool js::intl::SharedIntlData::validateAndCanonicalizeTimeZone(
    277    JSContext* cx, Handle<JSLinearString*> timeZone,
    278    MutableHandle<JSAtom*> identifier, MutableHandle<JSAtom*> primary) {
    279  if (!ensureTimeZones(cx)) {
    280    return false;
    281  }
    282  return validateAndCanonicalizeTimeZone(
    283      cx, AvailableTimeZoneSet::Lookup{timeZone}, identifier, primary);
    284 }
    285 
    286 bool js::intl::SharedIntlData::validateAndCanonicalizeTimeZone(
    287    JSContext* cx, mozilla::Span<const char> timeZone,
    288    MutableHandle<JSAtom*> identifier, MutableHandle<JSAtom*> primary) {
    289  if (!ensureTimeZones(cx)) {
    290    return false;
    291  }
    292  return validateAndCanonicalizeTimeZone(
    293      cx, AvailableTimeZoneSet::Lookup{timeZone.data(), timeZone.size()},
    294      identifier, primary);
    295 }
    296 
    297 JSAtom* js::intl::SharedIntlData::canonicalizeAvailableTimeZone(
    298    JSContext* cx, Handle<JSAtom*> availableTimeZone) {
    299  MOZ_ASSERT(timeZoneDataInitialized);
    300  MOZ_ASSERT(
    301      availableTimeZones.has(AvailableTimeZoneSet::Lookup{availableTimeZone}),
    302      "Invalid time zone name");
    303 
    304  // Some time zone names are canonicalized differently by ICU.
    305  auto* canonicalTimeZone =
    306      tryCanonicalizeTimeZoneConsistentWithIANA(availableTimeZone);
    307  if (canonicalTimeZone) {
    308    cx->markAtom(canonicalTimeZone);
    309    return canonicalTimeZone;
    310  }
    311 
    312  JS::AutoStableStringChars stableChars(cx);
    313  if (!stableChars.initTwoByte(cx, availableTimeZone)) {
    314    return nullptr;
    315  }
    316 
    317  using TimeZone = mozilla::intl::TimeZone;
    318 
    319  intl::FormatBuffer<char16_t, TimeZone::TimeZoneIdentifierLength> buffer(cx);
    320  auto result =
    321      TimeZone::GetCanonicalTimeZoneID(stableChars.twoByteRange(), buffer);
    322  if (result.isErr()) {
    323    intl::ReportInternalError(cx, result.unwrapErr());
    324    return nullptr;
    325  }
    326  MOZ_ASSERT(std::u16string_view(u"Etc/Unknown") !=
    327                 std::u16string_view(buffer.data(), buffer.length()),
    328             "Invalid canonical time zone");
    329 
    330  auto availablePtr = availableTimeZones.lookup(
    331      AvailableTimeZoneSet::Lookup{buffer.data(), buffer.length()});
    332  MOZ_ASSERT(availablePtr, "Invalid time zone name");
    333 
    334  cx->markAtom(*availablePtr);
    335  return *availablePtr;
    336 }
    337 
    338 JSAtom* js::intl::SharedIntlData::tryCanonicalizeTimeZoneConsistentWithIANA(
    339    JSAtom* availableTimeZone) {
    340  MOZ_ASSERT(timeZoneDataInitialized);
    341  MOZ_ASSERT(
    342      availableTimeZones.has(AvailableTimeZoneSet::Lookup{availableTimeZone}),
    343      "Invalid time zone name");
    344 
    345  TimeZoneMap::Lookup lookup(availableTimeZone);
    346  if (TimeZoneMap::Ptr p =
    347          ianaLinksCanonicalizedDifferentlyByICU.lookup(lookup)) {
    348    // The effectively supported time zones aren't known at compile time,
    349    // when
    350    // 1. SpiderMonkey was compiled with "--with-system-icu".
    351    // 2. ICU's dynamic time zone data loading feature was used.
    352    //    (ICU supports loading time zone files at runtime through the
    353    //    ICU_TIMEZONE_FILES_DIR environment variable.)
    354    // Ensure ICU supports the new target zone before applying the update.
    355    TimeZoneName targetTimeZone = p->value();
    356    if (availableTimeZones.has(AvailableTimeZoneSet::Lookup{targetTimeZone})) {
    357      return targetTimeZone;
    358    }
    359  } else if (TimeZoneSet::Ptr p = ianaZonesTreatedAsLinksByICU.lookup(lookup)) {
    360    return *p;
    361  }
    362  return nullptr;
    363 }
    364 
    365 JS::Result<js::intl::SharedIntlData::AvailableTimeZoneSet::Iterator>
    366 js::intl::SharedIntlData::availableTimeZonesIteration(JSContext* cx) {
    367  if (!ensureTimeZones(cx)) {
    368    return cx->alreadyReportedError();
    369  }
    370  return availableTimeZones.iter();
    371 }
    372 
    373 js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(
    374    const JSLinearString* locale)
    375    : js::intl::SharedIntlData::LinearStringLookup(locale) {
    376  if (isLatin1) {
    377    hash = mozilla::HashString(latin1Chars, length);
    378  } else {
    379    hash = mozilla::HashString(twoByteChars, length);
    380  }
    381 }
    382 
    383 js::intl::SharedIntlData::LocaleHasher::Lookup::Lookup(const char* chars,
    384                                                       size_t length)
    385    : js::intl::SharedIntlData::LinearStringLookup(chars, length) {
    386  hash = mozilla::HashString(latin1Chars, length);
    387 }
    388 
    389 bool js::intl::SharedIntlData::LocaleHasher::match(Locale key,
    390                                                   const Lookup& lookup) {
    391  if (key->length() != lookup.length) {
    392    return false;
    393  }
    394 
    395  if (key->hasLatin1Chars()) {
    396    const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
    397    if (lookup.isLatin1) {
    398      return EqualChars(keyChars, lookup.latin1Chars, lookup.length);
    399    }
    400    return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
    401  }
    402 
    403  const char16_t* keyChars = key->twoByteChars(lookup.nogc);
    404  if (lookup.isLatin1) {
    405    return EqualChars(lookup.latin1Chars, keyChars, lookup.length);
    406  }
    407  return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
    408 }
    409 
    410 template <class AvailableLocales>
    411 bool js::intl::SharedIntlData::getAvailableLocales(
    412    JSContext* cx, LocaleSet& locales,
    413    const AvailableLocales& availableLocales) {
    414  auto addLocale = [cx, &locales](const char* locale, size_t length) {
    415    JSAtom* atom = Atomize(cx, locale, length);
    416    if (!atom) {
    417      return false;
    418    }
    419 
    420    LocaleHasher::Lookup lookup(atom);
    421    LocaleSet::AddPtr p = locales.lookupForAdd(lookup);
    422 
    423    // ICU shouldn't report any duplicate locales, but if it does, just
    424    // ignore the duplicated locale.
    425    if (!p && !locales.add(p, atom)) {
    426      ReportOutOfMemory(cx);
    427      return false;
    428    }
    429 
    430    return true;
    431  };
    432 
    433  js::Vector<char, 16> lang(cx);
    434 
    435  for (const char* locale : availableLocales) {
    436    size_t length = strlen(locale);
    437 
    438    lang.clear();
    439    if (!lang.append(locale, length)) {
    440      return false;
    441    }
    442    MOZ_ASSERT(lang.length() == length);
    443 
    444    std::replace(lang.begin(), lang.end(), '_', '-');
    445 
    446    if (!addLocale(lang.begin(), length)) {
    447      return false;
    448    }
    449 
    450    // From <https://tc39.es/ecma402/#sec-internal-slots>:
    451    //
    452    // For locales that include a script subtag in addition to language and
    453    // region, the corresponding locale without a script subtag must also be
    454    // supported; that is, if an implementation recognizes "zh-Hant-TW", it is
    455    // also expected to recognize "zh-TW".
    456 
    457    //   2 * Alpha language subtag
    458    // + 1 separator
    459    // + 4 * Alphanum script subtag
    460    // + 1 separator
    461    // + 2 * Alpha region subtag
    462    using namespace mozilla::intl::LanguageTagLimits;
    463    static constexpr size_t MinLanguageLength = 2;
    464    static constexpr size_t MinLengthForScriptAndRegion =
    465        MinLanguageLength + 1 + ScriptLength + 1 + AlphaRegionLength;
    466 
    467    // Fast case: Skip locales without script subtags.
    468    if (length < MinLengthForScriptAndRegion) {
    469      continue;
    470    }
    471 
    472    // We don't need the full-fledged language tag parser when we just want to
    473    // remove the script subtag.
    474 
    475    // Find the separator between the language and script subtags.
    476    const char* sep = std::char_traits<char>::find(lang.begin(), length, '-');
    477    if (!sep) {
    478      continue;
    479    }
    480 
    481    // Possible |script| subtag start position.
    482    const char* script = sep + 1;
    483 
    484    // Find the separator between the script and region subtags.
    485    sep = std::char_traits<char>::find(script, lang.end() - script, '-');
    486    if (!sep) {
    487      continue;
    488    }
    489 
    490    // Continue with the next locale if we didn't find a script subtag.
    491    size_t scriptLength = sep - script;
    492    if (!mozilla::intl::IsStructurallyValidScriptTag<char>(
    493            {script, scriptLength})) {
    494      continue;
    495    }
    496 
    497    // Possible |region| subtag start position.
    498    const char* region = sep + 1;
    499 
    500    // Search if there's yet another subtag after the region subtag.
    501    sep = std::char_traits<char>::find(region, lang.end() - region, '-');
    502 
    503    // Continue with the next locale if we didn't find a region subtag.
    504    size_t regionLength = (sep ? sep : lang.end()) - region;
    505    if (!mozilla::intl::IsStructurallyValidRegionTag<char>(
    506            {region, regionLength})) {
    507      continue;
    508    }
    509 
    510    // We've found a script and a region subtag.
    511 
    512    static constexpr size_t ScriptWithSeparatorLength = ScriptLength + 1;
    513 
    514    // Remove the script subtag. Note: erase() needs non-const pointers, which
    515    // means we can't directly pass |script|.
    516    char* p = const_cast<char*>(script);
    517    lang.erase(p, p + ScriptWithSeparatorLength);
    518 
    519    MOZ_ASSERT(lang.length() == length - ScriptWithSeparatorLength);
    520 
    521    // Add the locale with the script subtag removed.
    522    if (!addLocale(lang.begin(), lang.length())) {
    523      return false;
    524    }
    525  }
    526 
    527  // Forcibly add an entry for the last-ditch locale, in case ICU doesn't
    528  // directly support it (but does support it through fallback, e.g. supporting
    529  // "en-GB" indirectly using "en" support).
    530  {
    531    const char* lastDitch = intl::LastDitchLocale();
    532    MOZ_ASSERT(strcmp(lastDitch, "en-GB") == 0);
    533 
    534 #ifdef DEBUG
    535    static constexpr char lastDitchParent[] = "en";
    536 
    537    LocaleHasher::Lookup lookup(lastDitchParent, strlen(lastDitchParent));
    538    MOZ_ASSERT(locales.has(lookup),
    539               "shouldn't be a need to add every locale implied by the "
    540               "last-ditch locale, merely just the last-ditch locale");
    541 #endif
    542 
    543    if (!addLocale(lastDitch, strlen(lastDitch))) {
    544      return false;
    545    }
    546  }
    547 
    548  return true;
    549 }
    550 
    551 #ifdef DEBUG
    552 template <class AvailableLocales1, class AvailableLocales2>
    553 static bool IsSameAvailableLocales(const AvailableLocales1& availableLocales1,
    554                                   const AvailableLocales2& availableLocales2) {
    555  return std::equal(std::begin(availableLocales1), std::end(availableLocales1),
    556                    std::begin(availableLocales2), std::end(availableLocales2),
    557                    [](const char* a, const char* b) {
    558                      // Intentionally comparing pointer equivalence.
    559                      return a == b;
    560                    });
    561 }
    562 #endif
    563 
    564 bool js::intl::SharedIntlData::ensureAvailableLocales(JSContext* cx) {
    565  if (availableLocalesInitialized) {
    566    return true;
    567  }
    568 
    569  // If ensureAvailableLocales() was called previously, but didn't complete due
    570  // to OOM, clear all data and start from scratch.
    571  availableLocales.clearAndCompact();
    572  collatorAvailableLocales.clearAndCompact();
    573 
    574  if (!getAvailableLocales(cx, availableLocales,
    575                           mozilla::intl::Locale::GetAvailableLocales())) {
    576    return false;
    577  }
    578  if (!getAvailableLocales(cx, collatorAvailableLocales,
    579                           mozilla::intl::Collator::GetAvailableLocales())) {
    580    return false;
    581  }
    582 
    583  MOZ_ASSERT(IsSameAvailableLocales(
    584      mozilla::intl::Locale::GetAvailableLocales(),
    585      mozilla::intl::DateTimeFormat::GetAvailableLocales()));
    586 
    587  MOZ_ASSERT(IsSameAvailableLocales(
    588      mozilla::intl::Locale::GetAvailableLocales(),
    589      mozilla::intl::NumberFormat::GetAvailableLocales()));
    590 
    591  MOZ_ASSERT(!availableLocalesInitialized,
    592             "ensureAvailableLocales is neither reentrant nor thread-safe");
    593  availableLocalesInitialized = true;
    594 
    595  return true;
    596 }
    597 
    598 bool js::intl::SharedIntlData::isAvailableLocale(JSContext* cx,
    599                                                 AvailableLocaleKind kind,
    600                                                 Handle<JSLinearString*> locale,
    601                                                 bool* available) {
    602  if (!ensureAvailableLocales(cx)) {
    603    return false;
    604  }
    605 
    606  LocaleHasher::Lookup lookup(locale);
    607 
    608  switch (kind) {
    609    case AvailableLocaleKind::Collator:
    610      *available = collatorAvailableLocales.has(lookup);
    611      return true;
    612    case AvailableLocaleKind::DateTimeFormat:
    613    case AvailableLocaleKind::DisplayNames:
    614    case AvailableLocaleKind::DurationFormat:
    615    case AvailableLocaleKind::ListFormat:
    616    case AvailableLocaleKind::NumberFormat:
    617    case AvailableLocaleKind::PluralRules:
    618    case AvailableLocaleKind::RelativeTimeFormat:
    619    case AvailableLocaleKind::Segmenter:
    620      *available = availableLocales.has(lookup);
    621      return true;
    622  }
    623  MOZ_CRASH("Invalid Intl constructor");
    624 }
    625 
    626 js::ArrayObject* js::intl::SharedIntlData::availableLocalesOf(
    627    JSContext* cx, AvailableLocaleKind kind) {
    628  if (!ensureAvailableLocales(cx)) {
    629    return nullptr;
    630  }
    631 
    632  LocaleSet* localeSet = nullptr;
    633  switch (kind) {
    634    case AvailableLocaleKind::Collator:
    635      localeSet = &collatorAvailableLocales;
    636      break;
    637    case AvailableLocaleKind::DateTimeFormat:
    638    case AvailableLocaleKind::DisplayNames:
    639    case AvailableLocaleKind::DurationFormat:
    640    case AvailableLocaleKind::ListFormat:
    641    case AvailableLocaleKind::NumberFormat:
    642    case AvailableLocaleKind::PluralRules:
    643    case AvailableLocaleKind::RelativeTimeFormat:
    644    case AvailableLocaleKind::Segmenter:
    645      localeSet = &availableLocales;
    646      break;
    647    default:
    648      MOZ_CRASH("Invalid Intl constructor");
    649  }
    650 
    651  const uint32_t count = localeSet->count();
    652  ArrayObject* result = NewDenseFullyAllocatedArray(cx, count);
    653  if (!result) {
    654    return nullptr;
    655  }
    656  result->setDenseInitializedLength(count);
    657 
    658  uint32_t index = 0;
    659  for (auto range = localeSet->iter(); !range.done(); range.next()) {
    660    JSAtom* locale = range.get();
    661    cx->markAtom(locale);
    662 
    663    result->initDenseElement(index++, StringValue(locale));
    664  }
    665  MOZ_ASSERT(index == count);
    666 
    667  return result;
    668 }
    669 
    670 #if DEBUG || MOZ_SYSTEM_ICU
    671 bool js::intl::SharedIntlData::ensureUpperCaseFirstLocales(JSContext* cx) {
    672  if (upperCaseFirstInitialized) {
    673    return true;
    674  }
    675 
    676  // If ensureUpperCaseFirstLocales() was called previously, but didn't
    677  // complete due to OOM, clear all data and start from scratch.
    678  upperCaseFirstLocales.clearAndCompact();
    679 
    680  for (const char* rawLocale : mozilla::intl::Collator::GetAvailableLocales()) {
    681    auto collator = mozilla::intl::Collator::TryCreate(rawLocale);
    682    if (collator.isErr()) {
    683      ReportInternalError(cx, collator.unwrapErr());
    684      return false;
    685    }
    686 
    687    auto caseFirst = collator.unwrap()->GetCaseFirst();
    688    if (caseFirst.isErr()) {
    689      ReportInternalError(cx, caseFirst.unwrapErr());
    690      return false;
    691    }
    692 
    693    if (caseFirst.unwrap() != mozilla::intl::Collator::CaseFirst::Upper) {
    694      continue;
    695    }
    696 
    697    JSAtom* locale = Atomize(cx, rawLocale, strlen(rawLocale));
    698    if (!locale) {
    699      return false;
    700    }
    701 
    702    LocaleHasher::Lookup lookup(locale);
    703    LocaleSet::AddPtr p = upperCaseFirstLocales.lookupForAdd(lookup);
    704 
    705    // ICU shouldn't report any duplicate locales, but if it does, just
    706    // ignore the duplicated locale.
    707    if (!p && !upperCaseFirstLocales.add(p, locale)) {
    708      ReportOutOfMemory(cx);
    709      return false;
    710    }
    711  }
    712 
    713  MOZ_ASSERT(
    714      !upperCaseFirstInitialized,
    715      "ensureUpperCaseFirstLocales is neither reentrant nor thread-safe");
    716  upperCaseFirstInitialized = true;
    717 
    718  return true;
    719 }
    720 #endif  // DEBUG || MOZ_SYSTEM_ICU
    721 
    722 bool js::intl::SharedIntlData::isUpperCaseFirst(JSContext* cx,
    723                                                Handle<JSLinearString*> locale,
    724                                                bool* isUpperFirst) {
    725 #if DEBUG || MOZ_SYSTEM_ICU
    726  if (!ensureUpperCaseFirstLocales(cx)) {
    727    return false;
    728  }
    729 #endif
    730 
    731 #if !MOZ_SYSTEM_ICU
    732  // "da" (Danish) and "mt" (Maltese) are the only two supported locales using
    733  // upper-case first. CLDR also lists "cu" (Church Slavic) as an upper-case
    734  // first locale, but since it's not supported in ICU, we don't care about it
    735  // here.
    736  bool isDefaultUpperCaseFirstLocale = js::StringEqualsLiteral(locale, "da") ||
    737                                       js::StringEqualsLiteral(locale, "mt");
    738 #endif
    739 
    740 #if DEBUG || MOZ_SYSTEM_ICU
    741  LocaleHasher::Lookup lookup(locale);
    742  *isUpperFirst = upperCaseFirstLocales.has(lookup);
    743 #else
    744  *isUpperFirst = isDefaultUpperCaseFirstLocale;
    745 #endif
    746 
    747 #if !MOZ_SYSTEM_ICU
    748  MOZ_ASSERT(*isUpperFirst == isDefaultUpperCaseFirstLocale,
    749             "upper-case first locales don't match hard-coded list");
    750 #endif
    751 
    752  return true;
    753 }
    754 
    755 #if DEBUG || MOZ_SYSTEM_ICU
    756 bool js::intl::SharedIntlData::ensureIgnorePunctuationLocales(JSContext* cx) {
    757  if (ignorePunctuationInitialized) {
    758    return true;
    759  }
    760 
    761  // If ensureIgnorePunctuationLocales() was called previously, but didn't
    762  // complete due to OOM, clear all data and start from scratch.
    763  ignorePunctuationLocales.clearAndCompact();
    764 
    765  for (const char* rawLocale : mozilla::intl::Collator::GetAvailableLocales()) {
    766    auto collator = mozilla::intl::Collator::TryCreate(rawLocale);
    767    if (collator.isErr()) {
    768      ReportInternalError(cx, collator.unwrapErr());
    769      return false;
    770    }
    771 
    772    auto ignorePunctuation = collator.unwrap()->GetIgnorePunctuation();
    773    if (ignorePunctuation.isErr()) {
    774      ReportInternalError(cx, ignorePunctuation.unwrapErr());
    775      return false;
    776    }
    777 
    778    if (!ignorePunctuation.unwrap()) {
    779      continue;
    780    }
    781 
    782    JSAtom* locale = Atomize(cx, rawLocale, strlen(rawLocale));
    783    if (!locale) {
    784      return false;
    785    }
    786 
    787    LocaleHasher::Lookup lookup(locale);
    788    LocaleSet::AddPtr p = ignorePunctuationLocales.lookupForAdd(lookup);
    789 
    790    // ICU shouldn't report any duplicate locales, but if it does, just
    791    // ignore the duplicated locale.
    792    if (!p && !ignorePunctuationLocales.add(p, locale)) {
    793      ReportOutOfMemory(cx);
    794      return false;
    795    }
    796  }
    797 
    798  MOZ_ASSERT(
    799      !ignorePunctuationInitialized,
    800      "ensureIgnorePunctuationLocales is neither reentrant nor thread-safe");
    801  ignorePunctuationInitialized = true;
    802 
    803  return true;
    804 }
    805 #endif  // DEBUG || MOZ_SYSTEM_ICU
    806 
    807 bool js::intl::SharedIntlData::isIgnorePunctuation(
    808    JSContext* cx, Handle<JSLinearString*> locale, bool* ignorePunctuation) {
    809 #if DEBUG || MOZ_SYSTEM_ICU
    810  if (!ensureIgnorePunctuationLocales(cx)) {
    811    return false;
    812  }
    813 #endif
    814 
    815 #if !MOZ_SYSTEM_ICU
    816  // "th" (Thai) is the only supported locale which ignores punctuation by
    817  // default.
    818  bool isDefaultIgnorePunctuationLocale = js::StringEqualsLiteral(locale, "th");
    819 #endif
    820 
    821 #if DEBUG || MOZ_SYSTEM_ICU
    822  LocaleHasher::Lookup lookup(locale);
    823  *ignorePunctuation = ignorePunctuationLocales.has(lookup);
    824 #else
    825  *ignorePunctuation = isDefaultIgnorePunctuationLocale;
    826 #endif
    827 
    828 #if !MOZ_SYSTEM_ICU
    829  MOZ_ASSERT(*ignorePunctuation == isDefaultIgnorePunctuationLocale,
    830             "ignore punctuation locales don't match hard-coded list");
    831 #endif
    832 
    833  return true;
    834 }
    835 
    836 void js::intl::DateTimePatternGeneratorDeleter::operator()(
    837    mozilla::intl::DateTimePatternGenerator* ptr) {
    838  delete ptr;
    839 }
    840 
    841 static bool StringsAreEqual(const char* s1, const char* s2) {
    842  return !strcmp(s1, s2);
    843 }
    844 
    845 mozilla::intl::DateTimePatternGenerator*
    846 js::intl::SharedIntlData::getDateTimePatternGenerator(JSContext* cx,
    847                                                      const char* locale) {
    848  // Return the cached instance if the requested locale matches the locale
    849  // of the cached generator.
    850  if (dateTimePatternGeneratorLocale &&
    851      StringsAreEqual(dateTimePatternGeneratorLocale.get(), locale)) {
    852    return dateTimePatternGenerator.get();
    853  }
    854 
    855  auto result = mozilla::intl::DateTimePatternGenerator::TryCreate(locale);
    856  if (result.isErr()) {
    857    intl::ReportInternalError(cx, result.unwrapErr());
    858    return nullptr;
    859  }
    860  // The UniquePtr needs to be recreated as it's using a different Deleter in
    861  // order to be able to forward declare DateTimePatternGenerator in
    862  // SharedIntlData.h.
    863  UniqueDateTimePatternGenerator gen(result.unwrap().release());
    864 
    865  JS::UniqueChars localeCopy = js::DuplicateString(cx, locale);
    866  if (!localeCopy) {
    867    return nullptr;
    868  }
    869 
    870  dateTimePatternGenerator = std::move(gen);
    871  dateTimePatternGeneratorLocale = std::move(localeCopy);
    872 
    873  return dateTimePatternGenerator.get();
    874 }
    875 
    876 void js::intl::SharedIntlData::destroyInstance() {
    877  availableTimeZones.clearAndCompact();
    878  ianaZonesTreatedAsLinksByICU.clearAndCompact();
    879  ianaLinksCanonicalizedDifferentlyByICU.clearAndCompact();
    880  availableLocales.clearAndCompact();
    881  collatorAvailableLocales.clearAndCompact();
    882 #if DEBUG || MOZ_SYSTEM_ICU
    883  upperCaseFirstLocales.clearAndCompact();
    884  ignorePunctuationLocales.clearAndCompact();
    885 #endif
    886 }
    887 
    888 void js::intl::SharedIntlData::trace(JSTracer* trc) {
    889  // Atoms are always tenured.
    890  if (!JS::RuntimeHeapIsMinorCollecting()) {
    891    availableTimeZones.trace(trc);
    892    ianaZonesTreatedAsLinksByICU.trace(trc);
    893    ianaLinksCanonicalizedDifferentlyByICU.trace(trc);
    894    availableLocales.trace(trc);
    895    collatorAvailableLocales.trace(trc);
    896 #if DEBUG || MOZ_SYSTEM_ICU
    897    upperCaseFirstLocales.trace(trc);
    898    ignorePunctuationLocales.trace(trc);
    899 #endif
    900  }
    901 }
    902 
    903 size_t js::intl::SharedIntlData::sizeOfExcludingThis(
    904    mozilla::MallocSizeOf mallocSizeOf) const {
    905  return availableTimeZones.shallowSizeOfExcludingThis(mallocSizeOf) +
    906         ianaZonesTreatedAsLinksByICU.shallowSizeOfExcludingThis(mallocSizeOf) +
    907         ianaLinksCanonicalizedDifferentlyByICU.shallowSizeOfExcludingThis(
    908             mallocSizeOf) +
    909         availableLocales.shallowSizeOfExcludingThis(mallocSizeOf) +
    910         collatorAvailableLocales.shallowSizeOfExcludingThis(mallocSizeOf) +
    911 #if DEBUG || MOZ_SYSTEM_ICU
    912         upperCaseFirstLocales.shallowSizeOfExcludingThis(mallocSizeOf) +
    913         ignorePunctuationLocales.shallowSizeOfExcludingThis(mallocSizeOf) +
    914 #endif
    915         mallocSizeOf(dateTimePatternGeneratorLocale.get());
    916 }