commit bf6a6388ea10529cf521d485f0d12814e107ebb9
parent b0d1e698fd76803ba7c74eb5b9e8dd3a57c723bf
Author: André Bargull <andre.bargull@gmail.com>
Date: Tue, 16 Dec 2025 18:23:28 +0000
Bug 2005531 - Part 5: Add SupportedLocalesOf C++ implementation. r=spidermonkey-reviewers,dminor
Reimplements `SupportedLocales` from "js/src/builtin/intl/CommonFunctions.js" in C++.
The spec references were copied from the self-hosted JS version. Updating all comments
to match the current spec will happen in a later bug.
Part 7 updates the `Intl` constructors to use this new function.
Part 8 will remove the self-hosted JS function.
Differential Revision: https://phabricator.services.mozilla.com/D276023
Diffstat:
3 files changed, 243 insertions(+), 41 deletions(-)
diff --git a/js/src/builtin/intl/LocaleNegotiation.cpp b/js/src/builtin/intl/LocaleNegotiation.cpp
@@ -11,17 +11,76 @@
#include <algorithm>
#include <iterator>
+#include <stddef.h>
+#include "builtin/Array.h"
+#include "builtin/intl/CommonFunctions.h"
#include "builtin/intl/FormatBuffer.h"
#include "builtin/intl/SharedIntlData.h"
#include "builtin/intl/StringAsciiChars.h"
+#include "js/Conversions.h"
#include "js/Result.h"
+#include "vm/ArrayObject.h"
+#include "vm/GlobalObject.h"
#include "vm/JSContext.h"
+#include "vm/Realm.h"
#include "vm/StringType.h"
+#include "vm/NativeObject-inl.h"
+#include "vm/ObjectOperations-inl.h"
+
using namespace js;
using namespace js::intl;
+static bool AssertCanonicalLocaleWithoutUnicodeExtension(
+ JSContext* cx, Handle<JSLinearString*> locale) {
+#ifdef DEBUG
+ MOZ_ASSERT(StringIsAscii(locale), "language tags are ASCII-only");
+
+ // |locale| is a structurally valid language tag.
+ mozilla::intl::Locale tag;
+
+ using ParserError = mozilla::intl::LocaleParser::ParserError;
+ mozilla::Result<mozilla::Ok, ParserError> parse_result = Ok();
+ {
+ intl::StringAsciiChars chars(locale);
+ if (!chars.init(cx)) {
+ return false;
+ }
+
+ parse_result = mozilla::intl::LocaleParser::TryParse(chars, tag);
+ }
+
+ if (parse_result.isErr()) {
+ MOZ_ASSERT(parse_result.unwrapErr() == ParserError::OutOfMemory,
+ "locale is a structurally valid language tag");
+
+ intl::ReportInternalError(cx);
+ return false;
+ }
+
+ MOZ_ASSERT(!tag.GetUnicodeExtension(),
+ "locale must contain no Unicode extensions");
+
+ if (auto result = tag.Canonicalize(); result.isErr()) {
+ MOZ_ASSERT(result.unwrapErr() !=
+ mozilla::intl::Locale::CanonicalizationError::DuplicateVariant);
+ intl::ReportInternalError(cx);
+ return false;
+ }
+
+ intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx);
+ if (auto result = tag.ToString(buffer); result.isErr()) {
+ intl::ReportInternalError(cx, result.unwrapErr());
+ return false;
+ }
+
+ MOZ_ASSERT(StringEqualsAscii(locale, buffer.data(), buffer.length()),
+ "locale is a canonicalized language tag");
+#endif
+ return true;
+}
+
static bool SameOrParentLocale(const JSLinearString* locale,
const JSLinearString* otherLocale) {
// Return true if |locale| is the same locale as |otherLocale|.
@@ -38,7 +97,16 @@ static bool SameOrParentLocale(const JSLinearString* locale,
return false;
}
-// 9.2.2 BestAvailableLocale ( availableLocales, locale )
+/**
+ * 9.2.2 BestAvailableLocale ( availableLocales, locale )
+ *
+ * Compares a BCP 47 language tag against the locales in availableLocales and
+ * returns the best available match. Uses the fallback mechanism of RFC 4647,
+ * section 3.4.
+ *
+ * Spec: ECMAScript Internationalization API Specification, 9.2.2.
+ * Spec: RFC 4647, section 3.4.
+ */
static JS::Result<JSLinearString*> BestAvailableLocale(
JSContext* cx, AvailableLocaleKind availableLocales,
Handle<JSLinearString*> locale, Handle<JSLinearString*> defaultLocale) {
@@ -71,6 +139,10 @@ static JS::Result<JSLinearString*> BestAvailableLocale(
return r - 1;
};
+ if (!AssertCanonicalLocaleWithoutUnicodeExtension(cx, locale)) {
+ return cx->alreadyReportedError();
+ }
+
// Step 1.
Rooted<JSLinearString*> candidate(cx, locale);
@@ -127,64 +199,180 @@ bool js::intl::BestAvailableLocale(JSContext* cx,
Handle<JSLinearString*> locale,
Handle<JSLinearString*> defaultLocale,
MutableHandle<JSLinearString*> result) {
-#ifdef DEBUG
- {
- MOZ_ASSERT(StringIsAscii(locale), "language tags are ASCII-only");
-
- // |locale| is a structurally valid language tag.
- mozilla::intl::Locale tag;
+ JSLinearString* res;
+ JS_TRY_VAR_OR_RETURN_FALSE(
+ cx, res,
+ BestAvailableLocale(cx, availableLocales, locale, defaultLocale));
+ if (res) {
+ result.set(res);
+ } else {
+ result.set(nullptr);
+ }
+ return true;
+}
- using ParserError = mozilla::intl::LocaleParser::ParserError;
- mozilla::Result<mozilla::Ok, ParserError> parse_result = Ok();
- {
- intl::StringAsciiChars chars(locale);
- if (!chars.init(cx)) {
- return false;
+template <typename CharT>
+static size_t BaseNameLength(mozilla::Range<const CharT> locale) {
+ // Search for the start of the first singleton subtag.
+ for (size_t i = 0; i < locale.length(); i++) {
+ if (locale[i] == '-') {
+ MOZ_RELEASE_ASSERT(i + 2 < locale.length(), "invalid locale");
+ if (locale[i + 2] == '-') {
+ return i;
}
-
- parse_result = mozilla::intl::LocaleParser::TryParse(chars, tag);
}
+ }
+ return locale.length();
+}
- if (parse_result.isErr()) {
- MOZ_ASSERT(parse_result.unwrapErr() == ParserError::OutOfMemory,
- "locale is a structurally valid language tag");
+static size_t BaseNameLength(JSLinearString* locale) {
+ JS::AutoCheckCannotGC nogc;
+ if (locale->hasLatin1Chars()) {
+ return BaseNameLength(locale->latin1Range(nogc));
+ }
+ return BaseNameLength(locale->twoByteRange(nogc));
+}
- intl::ReportInternalError(cx);
+/**
+ * Returns the subset of requestedLocales for which availableLocales has a
+ * matching (possibly fallback) locale. Locales appear in the same order in the
+ * returned list as in the input list.
+ *
+ * Spec: ECMAScript Internationalization API Specification, 9.2.7.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.8.
+ */
+static bool LookupSupportedLocales(
+ JSContext* cx, AvailableLocaleKind availableLocales,
+ Handle<LocalesList> requestedLocales,
+ MutableHandle<LocalesList> supportedLocales) {
+ // Step 1.
+ MOZ_ASSERT(supportedLocales.empty());
+
+ Rooted<JSLinearString*> defaultLocale(
+ cx, cx->global()->globalIntlData().defaultLocale(cx));
+ if (!defaultLocale) {
+ return false;
+ }
+
+ // Step 2.
+ Rooted<JSLinearString*> noExtensionsLocale(cx);
+ Rooted<JSLinearString*> availableLocale(cx);
+ for (size_t i = 0; i < requestedLocales.length(); i++) {
+ auto locale = requestedLocales[i];
+
+ // Step 2.a.
+ //
+ // Use the base name to ignore any extension sequences.
+ noExtensionsLocale =
+ NewDependentString(cx, locale, 0, BaseNameLength(locale));
+ if (!noExtensionsLocale) {
return false;
}
- MOZ_ASSERT(!tag.GetUnicodeExtension(),
- "locale must contain no Unicode extensions");
+ // Step 2.b.
+ JSLinearString* availableLocale;
+ JS_TRY_VAR_OR_RETURN_FALSE(
+ cx, availableLocale,
+ BestAvailableLocale(cx, availableLocales, noExtensionsLocale,
+ defaultLocale));
+
+ // Step 2.c.
+ if (availableLocale) {
+ if (!supportedLocales.append(locale)) {
+ return false;
+ }
+ }
+ }
+
+ // Step 3.
+ return true;
+}
- if (auto result = tag.Canonicalize(); result.isErr()) {
- MOZ_ASSERT(
- result.unwrapErr() !=
- mozilla::intl::Locale::CanonicalizationError::DuplicateVariant);
- intl::ReportInternalError(cx);
+/**
+ * Returns the subset of requestedLocales for which availableLocales has a
+ * matching (possibly fallback) locale. Locales appear in the same order in the
+ * returned list as in the input list.
+ *
+ * Spec: ECMAScript Internationalization API Specification, 9.2.9.
+ */
+static bool SupportedLocales(JSContext* cx,
+ AvailableLocaleKind availableLocales,
+ Handle<LocalesList> requestedLocales,
+ Handle<Value> options,
+ MutableHandle<LocalesList> supportedLocales) {
+ // Step 1.
+ if (!options.isUndefined()) {
+ // Step 1.a.
+ Rooted<JSObject*> obj(cx, ToObject(cx, options));
+ if (!obj) {
return false;
}
- intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx);
- if (auto result = tag.ToString(buffer); result.isErr()) {
- intl::ReportInternalError(cx, result.unwrapErr());
+ // Step 1.b.
+ Rooted<Value> localeMatcher(cx);
+ if (!GetProperty(cx, obj, obj, cx->names().localeMatcher, &localeMatcher)) {
return false;
}
- MOZ_ASSERT(StringEqualsAscii(locale, buffer.data(), buffer.length()),
- "locale is a canonicalized language tag");
+ if (!localeMatcher.isUndefined()) {
+ JSString* str = ToString(cx, localeMatcher);
+ if (!str) {
+ return false;
+ }
+
+ JSLinearString* linear = str->ensureLinear(cx);
+ if (!linear) {
+ return false;
+ }
+
+ if (!StringEqualsLiteral(linear, "lookup") &&
+ !StringEqualsLiteral(linear, "best fit")) {
+ if (auto chars = QuoteString(cx, linear)) {
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+ JSMSG_INVALID_LOCALE_MATCHER, chars.get());
+ }
+ return false;
+ }
+ }
}
-#endif
- JSLinearString* res;
- JS_TRY_VAR_OR_RETURN_FALSE(
- cx, res,
- BestAvailableLocale(cx, availableLocales, locale, defaultLocale));
- if (res) {
- result.set(res);
- } else {
- result.set(nullptr);
+ // Steps 2-5.
+ //
+ // We don't yet support anything better than the lookup matcher.
+ return LookupSupportedLocales(cx, availableLocales, requestedLocales,
+ supportedLocales);
+}
+
+static ArrayObject* LocalesListToArray(JSContext* cx,
+ Handle<LocalesList> locales) {
+ auto* array = NewDenseFullyAllocatedArray(cx, locales.length());
+ if (!array) {
+ return nullptr;
}
- return true;
+ array->setDenseInitializedLength(locales.length());
+
+ for (size_t i = 0; i < locales.length(); i++) {
+ array->initDenseElement(i, StringValue(locales[i]));
+ }
+ return array;
+}
+
+ArrayObject* js::intl::SupportedLocalesOf(JSContext* cx,
+ AvailableLocaleKind availableLocales,
+ Handle<Value> locales,
+ Handle<Value> options) {
+ Rooted<LocalesList> requestedLocales(cx, cx);
+ if (!CanonicalizeLocaleList(cx, locales, &requestedLocales)) {
+ return nullptr;
+ }
+
+ Rooted<LocalesList> supportedLocales(cx, cx);
+ if (!SupportedLocales(cx, availableLocales, requestedLocales, options,
+ &supportedLocales)) {
+ return nullptr;
+ }
+
+ return LocalesListToArray(cx, supportedLocales);
}
JSLinearString* js::intl::ComputeDefaultLocale(JSContext* cx) {
diff --git a/js/src/builtin/intl/LocaleNegotiation.h b/js/src/builtin/intl/LocaleNegotiation.h
@@ -12,6 +12,10 @@
class JSLinearString;
+namespace js {
+class ArrayObject;
+}
+
namespace js::intl {
enum class AvailableLocaleKind;
@@ -46,6 +50,15 @@ bool BestAvailableLocale(JSContext* cx, AvailableLocaleKind availableLocales,
JS::MutableHandle<JSLinearString*> result);
/**
+ * Return the supported locales in |locales| which are supported according to
+ * |availableLocales|.
+ */
+ArrayObject* SupportedLocalesOf(JSContext* cx,
+ AvailableLocaleKind availableLocales,
+ JS::Handle<JS::Value> locales,
+ JS::Handle<JS::Value> options);
+
+/**
* Return the supported locale for the default locale if ICU supports that
* default locale (perhaps via fallback, e.g. supporting "de-CH" through "de"
* support implied by a "de-DE" locale). Otherwise uses the last-ditch locale.
diff --git a/js/src/vm/CommonPropertyNames.h b/js/src/vm/CommonPropertyNames.h
@@ -305,6 +305,7 @@
MACRO_(literal, "literal") \
MACRO_(loc, "loc") \
MACRO_(locale, "locale") \
+ MACRO_(localeMatcher, "localeMatcher") \
MACRO_(many, "many") \
MACRO_(MapConstructorInit, "MapConstructorInit") \
MACRO_(MapIteratorNext, "MapIteratorNext") \