tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit e356b422d4814182e7cdda7aeff5096531885db3
parent d9e12a778949416e27f2cbf29ad19f1b421e9201
Author: André Bargull <andre.bargull@gmail.com>
Date:   Fri, 12 Dec 2025 17:38:15 +0000

Bug 2005531 - Part 6: Add SupportedLocalesOf C++ implementation. r=spidermonkey-reviewers,dminor

Reimplements `SupportedLocales` from "js/src/builtin/intl/CommonFunctions.js" in C++.

The spec references were copied from the self-hosted JS version. Updating all comments
to match the current spec will happen in a later bug.

Part 7 updates the `Intl` constructors to use this new function.

Part 8 will remove the self-hosted JS function.

Differential Revision: https://phabricator.services.mozilla.com/D276023

Diffstat:
Mjs/src/builtin/intl/LocaleNegotiation.cpp | 270+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Mjs/src/builtin/intl/LocaleNegotiation.h | 13+++++++++++++
Mjs/src/vm/CommonPropertyNames.h | 1+
3 files changed, 243 insertions(+), 41 deletions(-)

diff --git a/js/src/builtin/intl/LocaleNegotiation.cpp b/js/src/builtin/intl/LocaleNegotiation.cpp @@ -11,17 +11,76 @@ #include <algorithm> #include <iterator> +#include <stddef.h> +#include "builtin/Array.h" +#include "builtin/intl/CommonFunctions.h" #include "builtin/intl/FormatBuffer.h" #include "builtin/intl/SharedIntlData.h" #include "builtin/intl/StringAsciiChars.h" +#include "js/Conversions.h" #include "js/Result.h" +#include "vm/ArrayObject.h" +#include "vm/GlobalObject.h" #include "vm/JSContext.h" +#include "vm/Realm.h" #include "vm/StringType.h" +#include "vm/NativeObject-inl.h" +#include "vm/ObjectOperations-inl.h" + using namespace js; using namespace js::intl; +static bool AssertCanonicalLocaleWithoutUnicodeExtension( + JSContext* cx, Handle<JSLinearString*> locale) { +#ifdef DEBUG + MOZ_ASSERT(StringIsAscii(locale), "language tags are ASCII-only"); + + // |locale| is a structurally valid language tag. + mozilla::intl::Locale tag; + + using ParserError = mozilla::intl::LocaleParser::ParserError; + mozilla::Result<mozilla::Ok, ParserError> parse_result = Ok(); + { + intl::StringAsciiChars chars(locale); + if (!chars.init(cx)) { + return false; + } + + parse_result = mozilla::intl::LocaleParser::TryParse(chars, tag); + } + + if (parse_result.isErr()) { + MOZ_ASSERT(parse_result.unwrapErr() == ParserError::OutOfMemory, + "locale is a structurally valid language tag"); + + intl::ReportInternalError(cx); + return false; + } + + MOZ_ASSERT(!tag.GetUnicodeExtension(), + "locale must contain no Unicode extensions"); + + if (auto result = tag.Canonicalize(); result.isErr()) { + MOZ_ASSERT(result.unwrapErr() != + mozilla::intl::Locale::CanonicalizationError::DuplicateVariant); + intl::ReportInternalError(cx); + return false; + } + + intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); + if (auto result = tag.ToString(buffer); result.isErr()) { + intl::ReportInternalError(cx, result.unwrapErr()); + return false; + } + + MOZ_ASSERT(StringEqualsAscii(locale, buffer.data(), buffer.length()), + "locale is a canonicalized language tag"); +#endif + return true; +} + static bool SameOrParentLocale(const JSLinearString* locale, const JSLinearString* otherLocale) { // Return true if |locale| is the same locale as |otherLocale|. @@ -38,7 +97,16 @@ static bool SameOrParentLocale(const JSLinearString* locale, return false; } -// 9.2.2 BestAvailableLocale ( availableLocales, locale ) +/** + * 9.2.2 BestAvailableLocale ( availableLocales, locale ) + * + * Compares a BCP 47 language tag against the locales in availableLocales and + * returns the best available match. Uses the fallback mechanism of RFC 4647, + * section 3.4. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.2. + * Spec: RFC 4647, section 3.4. + */ static JS::Result<JSLinearString*> BestAvailableLocale( JSContext* cx, AvailableLocaleKind availableLocales, Handle<JSLinearString*> locale, Handle<JSLinearString*> defaultLocale) { @@ -71,6 +139,10 @@ static JS::Result<JSLinearString*> BestAvailableLocale( return r - 1; }; + if (!AssertCanonicalLocaleWithoutUnicodeExtension(cx, locale)) { + return cx->alreadyReportedError(); + } + // Step 1. Rooted<JSLinearString*> candidate(cx, locale); @@ -127,64 +199,180 @@ bool js::intl::BestAvailableLocale(JSContext* cx, Handle<JSLinearString*> locale, Handle<JSLinearString*> defaultLocale, MutableHandle<JSLinearString*> result) { -#ifdef DEBUG - { - MOZ_ASSERT(StringIsAscii(locale), "language tags are ASCII-only"); - - // |locale| is a structurally valid language tag. - mozilla::intl::Locale tag; + JSLinearString* res; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, res, + BestAvailableLocale(cx, availableLocales, locale, defaultLocale)); + if (res) { + result.set(res); + } else { + result.set(nullptr); + } + return true; +} - using ParserError = mozilla::intl::LocaleParser::ParserError; - mozilla::Result<mozilla::Ok, ParserError> parse_result = Ok(); - { - intl::StringAsciiChars chars(locale); - if (!chars.init(cx)) { - return false; +template <typename CharT> +static size_t BaseNameLength(mozilla::Range<const CharT> locale) { + // Search for the start of the first singleton subtag. + for (size_t i = 0; i < locale.length(); i++) { + if (locale[i] == '-') { + MOZ_RELEASE_ASSERT(i + 2 < locale.length(), "invalid locale"); + if (locale[i + 2] == '-') { + return i; } - - parse_result = mozilla::intl::LocaleParser::TryParse(chars, tag); } + } + return locale.length(); +} - if (parse_result.isErr()) { - MOZ_ASSERT(parse_result.unwrapErr() == ParserError::OutOfMemory, - "locale is a structurally valid language tag"); +static size_t BaseNameLength(JSLinearString* locale) { + JS::AutoCheckCannotGC nogc; + if (locale->hasLatin1Chars()) { + return BaseNameLength(locale->latin1Range(nogc)); + } + return BaseNameLength(locale->twoByteRange(nogc)); +} - intl::ReportInternalError(cx); +/** + * Returns the subset of requestedLocales for which availableLocales has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.7. + * Spec: ECMAScript Internationalization API Specification, 9.2.8. + */ +static bool LookupSupportedLocales( + JSContext* cx, AvailableLocaleKind availableLocales, + Handle<LocalesList> requestedLocales, + MutableHandle<LocalesList> supportedLocales) { + // Step 1. + MOZ_ASSERT(supportedLocales.empty()); + + Rooted<JSLinearString*> defaultLocale( + cx, cx->global()->globalIntlData().defaultLocale(cx)); + if (!defaultLocale) { + return false; + } + + // Step 2. + Rooted<JSLinearString*> noExtensionsLocale(cx); + Rooted<JSLinearString*> availableLocale(cx); + for (size_t i = 0; i < requestedLocales.length(); i++) { + auto locale = requestedLocales[i]; + + // Step 2.a. + // + // Use the base name to ignore any extension sequences. + noExtensionsLocale = + NewDependentString(cx, locale, 0, BaseNameLength(locale)); + if (!noExtensionsLocale) { return false; } - MOZ_ASSERT(!tag.GetUnicodeExtension(), - "locale must contain no Unicode extensions"); + // Step 2.b. + JSLinearString* availableLocale; + JS_TRY_VAR_OR_RETURN_FALSE( + cx, availableLocale, + BestAvailableLocale(cx, availableLocales, noExtensionsLocale, + defaultLocale)); + + // Step 2.c. + if (availableLocale) { + if (!supportedLocales.append(locale)) { + return false; + } + } + } + + // Step 3. + return true; +} - if (auto result = tag.Canonicalize(); result.isErr()) { - MOZ_ASSERT( - result.unwrapErr() != - mozilla::intl::Locale::CanonicalizationError::DuplicateVariant); - intl::ReportInternalError(cx); +/** + * Returns the subset of requestedLocales for which availableLocales has a + * matching (possibly fallback) locale. Locales appear in the same order in the + * returned list as in the input list. + * + * Spec: ECMAScript Internationalization API Specification, 9.2.9. + */ +static bool SupportedLocales(JSContext* cx, + AvailableLocaleKind availableLocales, + Handle<LocalesList> requestedLocales, + Handle<Value> options, + MutableHandle<LocalesList> supportedLocales) { + // Step 1. + if (!options.isUndefined()) { + // Step 1.a. + Rooted<JSObject*> obj(cx, ToObject(cx, options)); + if (!obj) { return false; } - intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); - if (auto result = tag.ToString(buffer); result.isErr()) { - intl::ReportInternalError(cx, result.unwrapErr()); + // Step 1.b. + Rooted<Value> localeMatcher(cx); + if (!GetProperty(cx, obj, obj, cx->names().localeMatcher, &localeMatcher)) { return false; } - MOZ_ASSERT(StringEqualsAscii(locale, buffer.data(), buffer.length()), - "locale is a canonicalized language tag"); + if (!localeMatcher.isUndefined()) { + JSString* str = ToString(cx, localeMatcher); + if (!str) { + return false; + } + + JSLinearString* linear = str->ensureLinear(cx); + if (!linear) { + return false; + } + + if (!StringEqualsLiteral(linear, "lookup") && + !StringEqualsLiteral(linear, "best fit")) { + if (auto chars = QuoteString(cx, linear)) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_INVALID_LOCALE_MATCHER, chars.get()); + } + return false; + } + } } -#endif - JSLinearString* res; - JS_TRY_VAR_OR_RETURN_FALSE( - cx, res, - BestAvailableLocale(cx, availableLocales, locale, defaultLocale)); - if (res) { - result.set(res); - } else { - result.set(nullptr); + // Steps 2-5. + // + // We don't yet support anything better than the lookup matcher. + return LookupSupportedLocales(cx, availableLocales, requestedLocales, + supportedLocales); +} + +static ArrayObject* LocalesListToArray(JSContext* cx, + Handle<LocalesList> locales) { + auto* array = NewDenseFullyAllocatedArray(cx, locales.length()); + if (!array) { + return nullptr; } - return true; + array->setDenseInitializedLength(locales.length()); + + for (size_t i = 0; i < locales.length(); i++) { + array->initDenseElement(i, StringValue(locales[i])); + } + return array; +} + +ArrayObject* js::intl::SupportedLocalesOf(JSContext* cx, + AvailableLocaleKind availableLocales, + Handle<Value> locales, + Handle<Value> options) { + Rooted<LocalesList> requestedLocales(cx, cx); + if (!CanonicalizeLocaleList(cx, locales, &requestedLocales)) { + return nullptr; + } + + Rooted<LocalesList> supportedLocales(cx, cx); + if (!SupportedLocales(cx, availableLocales, requestedLocales, options, + &supportedLocales)) { + return nullptr; + } + + return LocalesListToArray(cx, supportedLocales); } JSLinearString* js::intl::ComputeDefaultLocale(JSContext* cx) { diff --git a/js/src/builtin/intl/LocaleNegotiation.h b/js/src/builtin/intl/LocaleNegotiation.h @@ -12,6 +12,10 @@ class JSLinearString; +namespace js { +class ArrayObject; +} + namespace js::intl { enum class AvailableLocaleKind; @@ -46,6 +50,15 @@ bool BestAvailableLocale(JSContext* cx, AvailableLocaleKind availableLocales, JS::MutableHandle<JSLinearString*> result); /** + * Return the supported locales in |locales| which are supported according to + * |availableLocales|. + */ +ArrayObject* SupportedLocalesOf(JSContext* cx, + AvailableLocaleKind availableLocales, + JS::Handle<JS::Value> locales, + JS::Handle<JS::Value> options); + +/** * Return the supported locale for the default locale if ICU supports that * default locale (perhaps via fallback, e.g. supporting "de-CH" through "de" * support implied by a "de-DE" locale). Otherwise uses the last-ditch locale. diff --git a/js/src/vm/CommonPropertyNames.h b/js/src/vm/CommonPropertyNames.h @@ -303,6 +303,7 @@ MACRO_(literal, "literal") \ MACRO_(loc, "loc") \ MACRO_(locale, "locale") \ + MACRO_(localeMatcher, "localeMatcher") \ MACRO_(many, "many") \ MACRO_(MapConstructorInit, "MapConstructorInit") \ MACRO_(MapIteratorNext, "MapIteratorNext") \