commit 149bb3d3bd6e1f7229760fa153572a3ebb27c76d
parent 108d654abff40871cf52b549469502fc6a985e1d
Author: André Bargull <andre.bargull@gmail.com>
Date: Mon, 13 Oct 2025 12:55:24 +0000
Bug 1990248 - Part 3: Port locale-sensitive case conversion methods to C++. r=spidermonkey-reviewers,jandem
Differential Revision: https://phabricator.services.mozilla.com/D265825
Diffstat:
4 files changed, 64 insertions(+), 230 deletions(-)
diff --git a/js/src/builtin/String.cpp b/js/src/builtin/String.cpp
@@ -33,6 +33,7 @@
# include "builtin/intl/CommonFunctions.h"
# include "builtin/intl/FormatBuffer.h"
# include "builtin/intl/GlobalIntlData.h"
+# include "builtin/intl/Locale.h"
#endif
#include "builtin/RegExp.h"
#include "gc/GC.h"
@@ -943,15 +944,7 @@ static bool str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) {
}
#if JS_HAS_INTL_API
-// String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
-// with core functionality performed by the intrinsic below.
-
-static const char* CaseMappingLocale(JSContext* cx, JSString* str) {
- JSLinearString* locale = str->ensureLinear(cx);
- if (!locale) {
- return nullptr;
- }
-
+static const char* CaseMappingLocale(JSLinearString* locale) {
MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
// Lithuanian, Turkish, and Azeri have language dependent case mappings.
@@ -969,38 +962,47 @@ static const char* CaseMappingLocale(JSContext* cx, JSString* str) {
}
}
- return ""; // ICU root locale
+ return nullptr;
}
-static bool HasDefaultCasing(const char* locale) { return !strcmp(locale, ""); }
+enum class TargetCase { Lower, Upper };
-bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
- CallArgs args = CallArgsFromVp(argc, vp);
- MOZ_ASSERT(args.length() == 2);
- MOZ_ASSERT(args[0].isString());
- MOZ_ASSERT(args[1].isString());
-
- RootedString string(cx, args[0].toString());
+/**
+ * TransformCase ( S, locales, targetCase )
+ */
+static JSLinearString* TransformCase(JSContext* cx, Handle<JSString*> string,
+ Handle<Value> locales,
+ TargetCase targetCase) {
+ // Step 1.
+ Rooted<intl::LocalesList> requestedLocales(cx, cx);
+ if (!intl::CanonicalizeLocaleList(cx, locales, &requestedLocales)) {
+ return nullptr;
+ }
- const char* locale = CaseMappingLocale(cx, args[1].toString());
- if (!locale) {
- return false;
+ // Trivial case: When the input is empty, directly return the empty string.
+ if (string->empty()) {
+ return cx->emptyString();
}
- // Call String.prototype.toLowerCase() for language independent casing.
- if (HasDefaultCasing(locale)) {
- JSString* str = StringToLowerCase(cx, string);
- if (!str) {
- return false;
- }
+ // Steps 2-3.
+ Rooted<JSLinearString*> requestedLocale(cx);
+ if (!requestedLocales.empty()) {
+ requestedLocale = requestedLocales[0];
+ } else {
+ requestedLocale = cx->global()->globalIntlData().defaultLocale(cx);
+ }
- args.rval().setString(str);
- return true;
+ // Steps 4-10.
+ const char* locale = CaseMappingLocale(requestedLocale);
+ if (!locale) {
+ // Call the default case conversion methods for language independent casing.
+ return targetCase == TargetCase::Lower ? StringToLowerCase(cx, string)
+ : StringToUpperCase(cx, string);
}
AutoStableStringChars inputChars(cx);
if (!inputChars.initTwoByte(cx, string)) {
- return false;
+ return nullptr;
}
mozilla::Range<const char16_t> input = inputChars.twoByteRange();
@@ -1013,43 +1015,48 @@ bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx);
- auto ok = mozilla::intl::String::ToLocaleLowerCase(locale, input, buffer);
+ auto ok =
+ targetCase == TargetCase::Lower
+ ? mozilla::intl::String::ToLocaleLowerCase(locale, input, buffer)
+ : mozilla::intl::String::ToLocaleUpperCase(locale, input, buffer);
if (ok.isErr()) {
intl::ReportInternalError(cx, ok.unwrapErr());
- return false;
- }
-
- JSString* result = buffer.toString(cx);
- if (!result) {
- return false;
+ return nullptr;
}
- args.rval().setString(result);
- return true;
+ return buffer.toString(cx);
}
+#endif
-#else
-
-// When the Intl API is not exposed, String.prototype.toLowerCase is implemented
-// in C++.
static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype",
"toLocaleLowerCase");
CallArgs args = CallArgsFromVp(argc, vp);
- RootedString str(
+ // Steps 1-2.
+ Rooted<JSString*> str(
cx, ToStringForStringFunction(cx, "toLocaleLowerCase", args.thisv()));
if (!str) {
return false;
}
+#if JS_HAS_INTL_API
+ // Step 3.
+ auto* result = TransformCase(cx, str, args.get(0), TargetCase::Lower);
+ if (!result) {
+ return false;
+ }
+
+ args.rval().setString(result);
+ return true;
+#else
/*
* Forcefully ignore the first (or any) argument and return toLowerCase(),
* ECMA has reserved that argument, presumably for defining the locale.
*/
if (cx->runtime()->localeCallbacks &&
cx->runtime()->localeCallbacks->localeToLowerCase) {
- RootedValue result(cx);
+ Rooted<Value> result(cx);
if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) {
return false;
}
@@ -1070,10 +1077,9 @@ static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
args.rval().setString(result);
return true;
+#endif
}
-#endif // JS_HAS_INTL_API
-
static inline bool ToUpperCaseHasSpecialCasing(Latin1Char charCode) {
// U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
// special casing rules, so detect it inline.
@@ -1359,86 +1365,34 @@ static bool str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) {
return true;
}
-#if JS_HAS_INTL_API
-// String.prototype.toLocaleUpperCase is self-hosted when Intl is exposed,
-// with core functionality performed by the intrinsic below.
-
-bool js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
+static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
+ AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype",
+ "toLocaleUpperCase");
CallArgs args = CallArgsFromVp(argc, vp);
- MOZ_ASSERT(args.length() == 2);
- MOZ_ASSERT(args[0].isString());
- MOZ_ASSERT(args[1].isString());
-
- RootedString string(cx, args[0].toString());
-
- const char* locale = CaseMappingLocale(cx, args[1].toString());
- if (!locale) {
- return false;
- }
-
- // Call String.prototype.toUpperCase() for language independent casing.
- if (HasDefaultCasing(locale)) {
- JSString* str = js::StringToUpperCase(cx, string);
- if (!str) {
- return false;
- }
-
- args.rval().setString(str);
- return true;
- }
-
- AutoStableStringChars inputChars(cx);
- if (!inputChars.initTwoByte(cx, string)) {
- return false;
- }
- mozilla::Range<const char16_t> input = inputChars.twoByteRange();
-
- // Note: maximum case mapping length is three characters, so the result
- // length might be > INT32_MAX. ICU will fail in this case.
- static_assert(JSString::MAX_LENGTH <= INT32_MAX,
- "String length must fit in int32_t for ICU");
-
- static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
- intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx);
-
- auto ok = mozilla::intl::String::ToLocaleUpperCase(locale, input, buffer);
- if (ok.isErr()) {
- intl::ReportInternalError(cx, ok.unwrapErr());
+ Rooted<JSString*> str(
+ cx, ToStringForStringFunction(cx, "toLocaleUpperCase", args.thisv()));
+ if (!str) {
return false;
}
- JSString* result = buffer.toString(cx);
+#if JS_HAS_INTL_API
+ // Step 3.
+ auto* result = TransformCase(cx, str, args.get(0), TargetCase::Upper);
if (!result) {
return false;
}
args.rval().setString(result);
return true;
-}
-
#else
-
-// When the Intl API is not exposed, String.prototype.toUpperCase is implemented
-// in C++.
-static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
- AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype",
- "toLocaleUpperCase");
- CallArgs args = CallArgsFromVp(argc, vp);
-
- RootedString str(
- cx, ToStringForStringFunction(cx, "toLocaleUpperCase", args.thisv()));
- if (!str) {
- return false;
- }
-
/*
* Forcefully ignore the first (or any) argument and return toUpperCase(),
* ECMA has reserved that argument, presumably for defining the locale.
*/
if (cx->runtime()->localeCallbacks &&
cx->runtime()->localeCallbacks->localeToUpperCase) {
- RootedValue result(cx);
+ Rooted<Value> result(cx);
if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) {
return false;
}
@@ -1459,10 +1413,9 @@ static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
args.rval().setString(result);
return true;
+#endif
}
-#endif // JS_HAS_INTL_API
-
/**
* String.prototype.localeCompare ( that [ , reserved1 [ , reserved2 ] ] )
*
@@ -3892,13 +3845,8 @@ static const JSFunctionSpec string_methods[] = {
JS_INLINABLE_FN("trim", str_trim, 0, 0, StringTrim),
JS_INLINABLE_FN("trimStart", str_trimStart, 0, 0, StringTrimStart),
JS_INLINABLE_FN("trimEnd", str_trimEnd, 0, 0, StringTrimEnd),
-#if JS_HAS_INTL_API
- JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_toLocaleLowerCase", 0, 0),
- JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_toLocaleUpperCase", 0, 0),
-#else
JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0, 0),
JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0, 0),
-#endif
JS_FN("localeCompare", str_localeCompare, 1, 0),
JS_SELF_HOSTED_FN("repeat", "String_repeat", 1, 0),
#if JS_HAS_INTL_API
diff --git a/js/src/builtin/String.h b/js/src/builtin/String.h
@@ -40,26 +40,6 @@ extern bool str_codePointAt(JSContext* cx, unsigned argc, Value* vp);
extern bool str_endsWith(JSContext* cx, unsigned argc, Value* vp);
-#if JS_HAS_INTL_API
-/**
- * Returns the input string converted to lower case based on the language
- * specific case mappings for the input locale.
- *
- * Usage: lowerCase = intl_toLocaleLowerCase(string, locale)
- */
-[[nodiscard]] extern bool intl_toLocaleLowerCase(JSContext* cx, unsigned argc,
- Value* vp);
-
-/**
- * Returns the input string converted to upper case based on the language
- * specific case mappings for the input locale.
- *
- * Usage: upperCase = intl_toLocaleUpperCase(string, locale)
- */
-[[nodiscard]] extern bool intl_toLocaleUpperCase(JSContext* cx, unsigned argc,
- Value* vp);
-#endif
-
ArrayObject* StringSplitString(JSContext* cx, HandleString str,
HandleString sep, uint32_t limit);
diff --git a/js/src/builtin/String.js b/js/src/builtin/String.js
@@ -841,98 +841,6 @@ function StringIteratorNext() {
}
SetIsInlinableLargeFunction(StringIteratorNext);
-#if JS_HAS_INTL_API
-/**
- * 13.1.2 String.prototype.toLocaleLowerCase ( [ locales ] )
- *
- * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b
- */
-function String_toLocaleLowerCase() {
- // Step 1.
- if (IsNullOrUndefined(this)) {
- ThrowIncompatibleMethod("toLocaleLowerCase", this);
- }
-
- // Step 2.
- var string = ToString(this);
-
- // Handle the common cases (no locales argument or a single string
- // argument) first.
- var locales = ArgumentsLength() ? GetArgument(0) : undefined;
- var requestedLocale;
- if (locales === undefined) {
- // Steps 3, 6.
- requestedLocale = undefined;
- } else if (typeof locales === "string") {
- // Steps 3, 5.
- requestedLocale = intl_ValidateAndCanonicalizeLanguageTag(locales, false);
- } else {
- // Step 3.
- var requestedLocales = CanonicalizeLocaleList(locales);
-
- // Steps 4-6.
- requestedLocale = requestedLocales.length ? requestedLocales[0] : undefined;
- }
-
- // Trivial case: When the input is empty, directly return the empty string.
- if (string.length === 0) {
- return "";
- }
-
- if (requestedLocale === undefined) {
- requestedLocale = intl_DefaultLocale();
- }
-
- // Steps 7-16.
- return intl_toLocaleLowerCase(string, requestedLocale);
-}
-
-/**
- * 13.1.3 String.prototype.toLocaleUpperCase ( [ locales ] )
- *
- * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b
- */
-function String_toLocaleUpperCase() {
- // Step 1.
- if (IsNullOrUndefined(this)) {
- ThrowIncompatibleMethod("toLocaleUpperCase", this);
- }
-
- // Step 2.
- var string = ToString(this);
-
- // Handle the common cases (no locales argument or a single string
- // argument) first.
- var locales = ArgumentsLength() ? GetArgument(0) : undefined;
- var requestedLocale;
- if (locales === undefined) {
- // Steps 3, 6.
- requestedLocale = undefined;
- } else if (typeof locales === "string") {
- // Steps 3, 5.
- requestedLocale = intl_ValidateAndCanonicalizeLanguageTag(locales, false);
- } else {
- // Step 3.
- var requestedLocales = CanonicalizeLocaleList(locales);
-
- // Steps 4-6.
- requestedLocale = requestedLocales.length ? requestedLocales[0] : undefined;
- }
-
- // Trivial case: When the input is empty, directly return the empty string.
- if (string.length === 0) {
- return "";
- }
-
- if (requestedLocale === undefined) {
- requestedLocale = intl_DefaultLocale();
- }
-
- // Steps 7-16.
- return intl_toLocaleUpperCase(string, requestedLocale);
-}
-#endif // JS_HAS_INTL_API
-
// ES2018 draft rev 8fadde42cf6a9879b4ab0cb6142b31c4ee501667
// 21.1.2.4 String.raw ( template, ...substitutions )
function String_static_raw(callSite /*, ...substitutions*/) {
diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp
@@ -1946,8 +1946,6 @@ static const JSFunctionSpec intrinsic_functions[] = {
JS_FN("intl_numberingSystem", intl_numberingSystem, 1, 0),
JS_FN("intl_resolveDateTimeFormatComponents",
intl_resolveDateTimeFormatComponents, 3, 0),
- JS_FN("intl_toLocaleLowerCase", intl_toLocaleLowerCase, 2, 0),
- JS_FN("intl_toLocaleUpperCase", intl_toLocaleUpperCase, 2, 0),
#endif // JS_HAS_INTL_API
// Standard builtins used by self-hosting.