tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

LocaleNegotiation.cpp (16125B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "builtin/intl/LocaleNegotiation.h"
      8 
      9 #include "mozilla/Assertions.h"
     10 #include "mozilla/intl/Locale.h"
     11 
     12 #include <algorithm>
     13 #include <iterator>
     14 #include <stddef.h>
     15 
     16 #include "builtin/Array.h"
     17 #include "builtin/intl/CommonFunctions.h"
     18 #include "builtin/intl/FormatBuffer.h"
     19 #include "builtin/intl/SharedIntlData.h"
     20 #include "builtin/intl/StringAsciiChars.h"
     21 #include "js/Conversions.h"
     22 #include "js/Result.h"
     23 #include "vm/ArrayObject.h"
     24 #include "vm/GlobalObject.h"
     25 #include "vm/JSContext.h"
     26 #include "vm/Realm.h"
     27 #include "vm/StringType.h"
     28 
     29 #include "vm/NativeObject-inl.h"
     30 #include "vm/ObjectOperations-inl.h"
     31 
     32 using namespace js;
     33 using namespace js::intl;
     34 
     35 static bool AssertCanonicalLocaleWithoutUnicodeExtension(
     36    JSContext* cx, Handle<JSLinearString*> locale) {
     37 #ifdef DEBUG
     38  MOZ_ASSERT(StringIsAscii(locale), "language tags are ASCII-only");
     39 
     40  // |locale| is a structurally valid language tag.
     41  mozilla::intl::Locale tag;
     42 
     43  using ParserError = mozilla::intl::LocaleParser::ParserError;
     44  mozilla::Result<mozilla::Ok, ParserError> parse_result = Ok();
     45  {
     46    intl::StringAsciiChars chars(locale);
     47    if (!chars.init(cx)) {
     48      return false;
     49    }
     50 
     51    parse_result = mozilla::intl::LocaleParser::TryParse(chars, tag);
     52  }
     53 
     54  if (parse_result.isErr()) {
     55    MOZ_ASSERT(parse_result.unwrapErr() == ParserError::OutOfMemory,
     56               "locale is a structurally valid language tag");
     57 
     58    intl::ReportInternalError(cx);
     59    return false;
     60  }
     61 
     62  MOZ_ASSERT(!tag.GetUnicodeExtension(),
     63             "locale must contain no Unicode extensions");
     64 
     65  if (auto result = tag.Canonicalize(); result.isErr()) {
     66    MOZ_ASSERT(result.unwrapErr() !=
     67               mozilla::intl::Locale::CanonicalizationError::DuplicateVariant);
     68    intl::ReportInternalError(cx);
     69    return false;
     70  }
     71 
     72  intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx);
     73  if (auto result = tag.ToString(buffer); result.isErr()) {
     74    intl::ReportInternalError(cx, result.unwrapErr());
     75    return false;
     76  }
     77 
     78  MOZ_ASSERT(StringEqualsAscii(locale, buffer.data(), buffer.length()),
     79             "locale is a canonicalized language tag");
     80 #endif
     81  return true;
     82 }
     83 
     84 static bool SameOrParentLocale(const JSLinearString* locale,
     85                               const JSLinearString* otherLocale) {
     86  // Return true if |locale| is the same locale as |otherLocale|.
     87  if (locale->length() == otherLocale->length()) {
     88    return EqualStrings(locale, otherLocale);
     89  }
     90 
     91  // Also return true if |locale| is the parent locale of |otherLocale|.
     92  if (locale->length() < otherLocale->length()) {
     93    return HasSubstringAt(otherLocale, locale, 0) &&
     94           otherLocale->latin1OrTwoByteChar(locale->length()) == '-';
     95  }
     96 
     97  return false;
     98 }
     99 
    100 /**
    101 * 9.2.2 BestAvailableLocale ( availableLocales, locale )
    102 *
    103 * Compares a BCP 47 language tag against the locales in availableLocales and
    104 * returns the best available match. Uses the fallback mechanism of RFC 4647,
    105 * section 3.4.
    106 *
    107 * Spec: ECMAScript Internationalization API Specification, 9.2.2.
    108 * Spec: RFC 4647, section 3.4.
    109 */
    110 static JS::Result<JSLinearString*> BestAvailableLocale(
    111    JSContext* cx, AvailableLocaleKind availableLocales,
    112    Handle<JSLinearString*> locale, Handle<JSLinearString*> defaultLocale) {
    113  // In the spec, [[availableLocales]] is formally a list of all available
    114  // locales. But in our implementation, it's an *incomplete* list, not
    115  // necessarily including the default locale (and all locales implied by it,
    116  // e.g. "de" implied by "de-CH"), if that locale isn't in every
    117  // [[availableLocales]] list (because that locale is supported through
    118  // fallback, e.g. "de-CH" supported through "de").
    119  //
    120  // If we're considering the default locale, augment the spec loop with
    121  // additional checks to also test whether the current prefix is a prefix of
    122  // the default locale.
    123 
    124  intl::SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref();
    125 
    126  auto findLast = [](const auto* chars, size_t length) {
    127    auto rbegin = std::make_reverse_iterator(chars + length);
    128    auto rend = std::make_reverse_iterator(chars);
    129    auto p = std::find(rbegin, rend, '-');
    130 
    131    // |dist(chars, p.base())| is equal to |dist(p, rend)|, pick whichever you
    132    // find easier to reason about when using reserve iterators.
    133    ptrdiff_t r = std::distance(chars, p.base());
    134    MOZ_ASSERT(r == std::distance(p, rend));
    135 
    136    // But always subtract one to convert from the reverse iterator result to
    137    // the correspoding forward iterator value, because reserve iterators point
    138    // to one element past the forward iterator value.
    139    return r - 1;
    140  };
    141 
    142  if (!AssertCanonicalLocaleWithoutUnicodeExtension(cx, locale)) {
    143    return cx->alreadyReportedError();
    144  }
    145 
    146  // Step 1.
    147  Rooted<JSLinearString*> candidate(cx, locale);
    148 
    149  // Step 2.
    150  while (true) {
    151    // Step 2.a.
    152    bool supported = false;
    153    if (!sharedIntlData.isAvailableLocale(cx, availableLocales, candidate,
    154                                          &supported)) {
    155      return cx->alreadyReportedError();
    156    }
    157    if (supported) {
    158      return candidate.get();
    159    }
    160 
    161    if (defaultLocale && SameOrParentLocale(candidate, defaultLocale)) {
    162      return candidate.get();
    163    }
    164 
    165    // Step 2.b.
    166    ptrdiff_t pos;
    167    if (candidate->hasLatin1Chars()) {
    168      JS::AutoCheckCannotGC nogc;
    169      pos = findLast(candidate->latin1Chars(nogc), candidate->length());
    170    } else {
    171      JS::AutoCheckCannotGC nogc;
    172      pos = findLast(candidate->twoByteChars(nogc), candidate->length());
    173    }
    174 
    175    if (pos < 0) {
    176      return nullptr;
    177    }
    178 
    179    // Step 2.c.
    180    size_t length = size_t(pos);
    181    if (length >= 2 && candidate->latin1OrTwoByteChar(length - 2) == '-') {
    182      length -= 2;
    183    }
    184 
    185    // Step 2.d.
    186    candidate = NewDependentString(cx, candidate, 0, length);
    187    if (!candidate) {
    188      return cx->alreadyReportedError();
    189    }
    190  }
    191 }
    192 
    193 // 9.2.2 BestAvailableLocale ( availableLocales, locale )
    194 //
    195 // Carries an additional third argument in our implementation to provide the
    196 // default locale. See the doc-comment in the header file.
    197 bool js::intl::BestAvailableLocale(JSContext* cx,
    198                                   AvailableLocaleKind availableLocales,
    199                                   Handle<JSLinearString*> locale,
    200                                   Handle<JSLinearString*> defaultLocale,
    201                                   MutableHandle<JSLinearString*> result) {
    202  JSLinearString* res;
    203  JS_TRY_VAR_OR_RETURN_FALSE(
    204      cx, res,
    205      BestAvailableLocale(cx, availableLocales, locale, defaultLocale));
    206  if (res) {
    207    result.set(res);
    208  } else {
    209    result.set(nullptr);
    210  }
    211  return true;
    212 }
    213 
    214 template <typename CharT>
    215 static size_t BaseNameLength(mozilla::Range<const CharT> locale) {
    216  // Search for the start of the first singleton subtag.
    217  for (size_t i = 0; i < locale.length(); i++) {
    218    if (locale[i] == '-') {
    219      MOZ_RELEASE_ASSERT(i + 2 < locale.length(), "invalid locale");
    220      if (locale[i + 2] == '-') {
    221        return i;
    222      }
    223    }
    224  }
    225  return locale.length();
    226 }
    227 
    228 static size_t BaseNameLength(JSLinearString* locale) {
    229  JS::AutoCheckCannotGC nogc;
    230  if (locale->hasLatin1Chars()) {
    231    return BaseNameLength(locale->latin1Range(nogc));
    232  }
    233  return BaseNameLength(locale->twoByteRange(nogc));
    234 }
    235 
    236 /**
    237 * Returns the subset of requestedLocales for which availableLocales has a
    238 * matching (possibly fallback) locale. Locales appear in the same order in the
    239 * returned list as in the input list.
    240 *
    241 * Spec: ECMAScript Internationalization API Specification, 9.2.7.
    242 * Spec: ECMAScript Internationalization API Specification, 9.2.8.
    243 */
    244 static bool LookupSupportedLocales(
    245    JSContext* cx, AvailableLocaleKind availableLocales,
    246    Handle<LocalesList> requestedLocales,
    247    MutableHandle<LocalesList> supportedLocales) {
    248  // Step 1.
    249  MOZ_ASSERT(supportedLocales.empty());
    250 
    251  Rooted<JSLinearString*> defaultLocale(
    252      cx, cx->global()->globalIntlData().defaultLocale(cx));
    253  if (!defaultLocale) {
    254    return false;
    255  }
    256 
    257  // Step 2.
    258  Rooted<JSLinearString*> noExtensionsLocale(cx);
    259  Rooted<JSLinearString*> availableLocale(cx);
    260  for (size_t i = 0; i < requestedLocales.length(); i++) {
    261    auto locale = requestedLocales[i];
    262 
    263    // Step 2.a.
    264    //
    265    // Use the base name to ignore any extension sequences.
    266    noExtensionsLocale =
    267        NewDependentString(cx, locale, 0, BaseNameLength(locale));
    268    if (!noExtensionsLocale) {
    269      return false;
    270    }
    271 
    272    // Step 2.b.
    273    JSLinearString* availableLocale;
    274    JS_TRY_VAR_OR_RETURN_FALSE(
    275        cx, availableLocale,
    276        BestAvailableLocale(cx, availableLocales, noExtensionsLocale,
    277                            defaultLocale));
    278 
    279    // Step 2.c.
    280    if (availableLocale) {
    281      if (!supportedLocales.append(locale)) {
    282        return false;
    283      }
    284    }
    285  }
    286 
    287  // Step 3.
    288  return true;
    289 }
    290 
    291 /**
    292 * Returns the subset of requestedLocales for which availableLocales has a
    293 * matching (possibly fallback) locale. Locales appear in the same order in the
    294 * returned list as in the input list.
    295 *
    296 * Spec: ECMAScript Internationalization API Specification, 9.2.9.
    297 */
    298 static bool SupportedLocales(JSContext* cx,
    299                             AvailableLocaleKind availableLocales,
    300                             Handle<LocalesList> requestedLocales,
    301                             Handle<Value> options,
    302                             MutableHandle<LocalesList> supportedLocales) {
    303  // Step 1.
    304  if (!options.isUndefined()) {
    305    // Step 1.a.
    306    Rooted<JSObject*> obj(cx, ToObject(cx, options));
    307    if (!obj) {
    308      return false;
    309    }
    310 
    311    // Step 1.b.
    312    Rooted<Value> localeMatcher(cx);
    313    if (!GetProperty(cx, obj, obj, cx->names().localeMatcher, &localeMatcher)) {
    314      return false;
    315    }
    316 
    317    if (!localeMatcher.isUndefined()) {
    318      JSString* str = ToString(cx, localeMatcher);
    319      if (!str) {
    320        return false;
    321      }
    322 
    323      JSLinearString* linear = str->ensureLinear(cx);
    324      if (!linear) {
    325        return false;
    326      }
    327 
    328      if (!StringEqualsLiteral(linear, "lookup") &&
    329          !StringEqualsLiteral(linear, "best fit")) {
    330        if (auto chars = QuoteString(cx, linear)) {
    331          JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
    332                                    JSMSG_INVALID_LOCALE_MATCHER, chars.get());
    333        }
    334        return false;
    335      }
    336    }
    337  }
    338 
    339  // Steps 2-5.
    340  //
    341  // We don't yet support anything better than the lookup matcher.
    342  return LookupSupportedLocales(cx, availableLocales, requestedLocales,
    343                                supportedLocales);
    344 }
    345 
    346 ArrayObject* js::intl::LocalesListToArray(JSContext* cx,
    347                                          Handle<LocalesList> locales) {
    348  auto* array = NewDenseFullyAllocatedArray(cx, locales.length());
    349  if (!array) {
    350    return nullptr;
    351  }
    352  array->setDenseInitializedLength(locales.length());
    353 
    354  for (size_t i = 0; i < locales.length(); i++) {
    355    array->initDenseElement(i, StringValue(locales[i]));
    356  }
    357  return array;
    358 }
    359 
    360 ArrayObject* js::intl::SupportedLocalesOf(JSContext* cx,
    361                                          AvailableLocaleKind availableLocales,
    362                                          Handle<Value> locales,
    363                                          Handle<Value> options) {
    364  Rooted<LocalesList> requestedLocales(cx, cx);
    365  if (!CanonicalizeLocaleList(cx, locales, &requestedLocales)) {
    366    return nullptr;
    367  }
    368 
    369  Rooted<LocalesList> supportedLocales(cx, cx);
    370  if (!SupportedLocales(cx, availableLocales, requestedLocales, options,
    371                        &supportedLocales)) {
    372    return nullptr;
    373  }
    374 
    375  return LocalesListToArray(cx, supportedLocales);
    376 }
    377 
    378 JSLinearString* js::intl::ComputeDefaultLocale(JSContext* cx) {
    379  const char* locale = cx->realm()->getLocale();
    380  if (!locale) {
    381    ReportOutOfMemory(cx);
    382    return nullptr;
    383  }
    384 
    385  auto span = mozilla::MakeStringSpan(locale);
    386 
    387  mozilla::intl::Locale tag;
    388  bool canParseLocale =
    389      mozilla::intl::LocaleParser::TryParse(span, tag).isOk() &&
    390      tag.Canonicalize().isOk();
    391 
    392  Rooted<JSLinearString*> candidate(cx);
    393  if (!canParseLocale) {
    394    candidate = NewStringCopyZ<CanGC>(cx, intl::LastDitchLocale());
    395    if (!candidate) {
    396      return nullptr;
    397    }
    398  } else {
    399    // The default locale must be in [[AvailableLocales]], and that list must
    400    // not contain any locales with Unicode extension sequences, so remove any
    401    // present in the candidate.
    402    tag.ClearUnicodeExtension();
    403 
    404    intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx);
    405    if (auto result = tag.ToString(buffer); result.isErr()) {
    406      intl::ReportInternalError(cx, result.unwrapErr());
    407      return nullptr;
    408    }
    409 
    410    candidate = buffer.toAsciiString(cx);
    411    if (!candidate) {
    412      return nullptr;
    413    }
    414 
    415    // Certain old-style language tags lack a script code, but in current
    416    // usage they *would* include a script code. Map these over to modern
    417    // forms.
    418    for (const auto& mapping : js::intl::oldStyleLanguageTagMappings) {
    419      const char* oldStyle = mapping.oldStyle;
    420      const char* modernStyle = mapping.modernStyle;
    421 
    422      if (StringEqualsAscii(candidate, oldStyle)) {
    423        candidate = NewStringCopyZ<CanGC>(cx, modernStyle);
    424        if (!candidate) {
    425          return nullptr;
    426        }
    427        break;
    428      }
    429    }
    430  }
    431 
    432  // 9.1 Internal slots of Service Constructors
    433  //
    434  // - [[AvailableLocales]] is a List [...]. The list must include the value
    435  //   returned by the DefaultLocale abstract operation (6.2.4), [...].
    436  //
    437  // That implies we must ignore any candidate which isn't supported by all
    438  // Intl service constructors.
    439 
    440  Rooted<JSLinearString*> supportedCollator(cx);
    441  JS_TRY_VAR_OR_RETURN_NULL(
    442      cx, supportedCollator,
    443      BestAvailableLocale(cx, AvailableLocaleKind::Collator, candidate,
    444                          nullptr));
    445 
    446  Rooted<JSLinearString*> supportedDateTimeFormat(cx);
    447  JS_TRY_VAR_OR_RETURN_NULL(
    448      cx, supportedDateTimeFormat,
    449      BestAvailableLocale(cx, AvailableLocaleKind::DateTimeFormat, candidate,
    450                          nullptr));
    451 
    452 #ifdef DEBUG
    453  // Note: We don't test the supported locales of the remaining Intl service
    454  // constructors, because the set of supported locales is exactly equal to
    455  // the set of supported locales of Intl.DateTimeFormat.
    456  for (auto kind : {
    457           AvailableLocaleKind::DisplayNames,
    458           AvailableLocaleKind::DurationFormat,
    459           AvailableLocaleKind::ListFormat,
    460           AvailableLocaleKind::NumberFormat,
    461           AvailableLocaleKind::PluralRules,
    462           AvailableLocaleKind::RelativeTimeFormat,
    463           AvailableLocaleKind::Segmenter,
    464       }) {
    465    JSLinearString* supported;
    466    JS_TRY_VAR_OR_RETURN_NULL(
    467        cx, supported, BestAvailableLocale(cx, kind, candidate, nullptr));
    468 
    469    MOZ_ASSERT(!!supported == !!supportedDateTimeFormat);
    470    MOZ_ASSERT_IF(supported, EqualStrings(supported, supportedDateTimeFormat));
    471  }
    472 #endif
    473 
    474  // Accept the candidate locale if it is supported by all Intl service
    475  // constructors.
    476  if (supportedCollator && supportedDateTimeFormat) {
    477    // Use the actually supported locale instead of the candidate locale. For
    478    // example when the candidate locale "en-US-posix" is supported through
    479    // "en-US", use "en-US" as the default locale.
    480    //
    481    // Also prefer the supported locale with more subtags. For example when
    482    // requesting "de-CH" and Intl.DateTimeFormat supports "de-CH", but
    483    // Intl.Collator only "de", still return "de-CH" as the result.
    484    if (SameOrParentLocale(supportedCollator, supportedDateTimeFormat)) {
    485      return supportedDateTimeFormat;
    486    }
    487    return supportedCollator;
    488  }
    489 
    490  // Return the last ditch locale if the candidate locale isn't supported.
    491  return NewStringCopyZ<CanGC>(cx, intl::LastDitchLocale());
    492 }