tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 149bb3d3bd6e1f7229760fa153572a3ebb27c76d
parent 108d654abff40871cf52b549469502fc6a985e1d
Author: André Bargull <andre.bargull@gmail.com>
Date:   Mon, 13 Oct 2025 12:55:24 +0000

Bug 1990248 - Part 3: Port locale-sensitive case conversion methods to C++. r=spidermonkey-reviewers,jandem

Differential Revision: https://phabricator.services.mozilla.com/D265825

Diffstat:
Mjs/src/builtin/String.cpp | 180++++++++++++++++++++++++++++---------------------------------------------------
Mjs/src/builtin/String.h | 20--------------------
Mjs/src/builtin/String.js | 92-------------------------------------------------------------------------------
Mjs/src/vm/SelfHosting.cpp | 2--
4 files changed, 64 insertions(+), 230 deletions(-)

diff --git a/js/src/builtin/String.cpp b/js/src/builtin/String.cpp @@ -33,6 +33,7 @@ # include "builtin/intl/CommonFunctions.h" # include "builtin/intl/FormatBuffer.h" # include "builtin/intl/GlobalIntlData.h" +# include "builtin/intl/Locale.h" #endif #include "builtin/RegExp.h" #include "gc/GC.h" @@ -943,15 +944,7 @@ static bool str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) { } #if JS_HAS_INTL_API -// String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed, -// with core functionality performed by the intrinsic below. - -static const char* CaseMappingLocale(JSContext* cx, JSString* str) { - JSLinearString* locale = str->ensureLinear(cx); - if (!locale) { - return nullptr; - } - +static const char* CaseMappingLocale(JSLinearString* locale) { MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag"); // Lithuanian, Turkish, and Azeri have language dependent case mappings. @@ -969,38 +962,47 @@ static const char* CaseMappingLocale(JSContext* cx, JSString* str) { } } - return ""; // ICU root locale + return nullptr; } -static bool HasDefaultCasing(const char* locale) { return !strcmp(locale, ""); } +enum class TargetCase { Lower, Upper }; -bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) { - CallArgs args = CallArgsFromVp(argc, vp); - MOZ_ASSERT(args.length() == 2); - MOZ_ASSERT(args[0].isString()); - MOZ_ASSERT(args[1].isString()); - - RootedString string(cx, args[0].toString()); +/** + * TransformCase ( S, locales, targetCase ) + */ +static JSLinearString* TransformCase(JSContext* cx, Handle<JSString*> string, + Handle<Value> locales, + TargetCase targetCase) { + // Step 1. + Rooted<intl::LocalesList> requestedLocales(cx, cx); + if (!intl::CanonicalizeLocaleList(cx, locales, &requestedLocales)) { + return nullptr; + } - const char* locale = CaseMappingLocale(cx, args[1].toString()); - if (!locale) { - return false; + // Trivial case: When the input is empty, directly return the empty string. + if (string->empty()) { + return cx->emptyString(); } - // Call String.prototype.toLowerCase() for language independent casing. - if (HasDefaultCasing(locale)) { - JSString* str = StringToLowerCase(cx, string); - if (!str) { - return false; - } + // Steps 2-3. + Rooted<JSLinearString*> requestedLocale(cx); + if (!requestedLocales.empty()) { + requestedLocale = requestedLocales[0]; + } else { + requestedLocale = cx->global()->globalIntlData().defaultLocale(cx); + } - args.rval().setString(str); - return true; + // Steps 4-10. + const char* locale = CaseMappingLocale(requestedLocale); + if (!locale) { + // Call the default case conversion methods for language independent casing. + return targetCase == TargetCase::Lower ? StringToLowerCase(cx, string) + : StringToUpperCase(cx, string); } AutoStableStringChars inputChars(cx); if (!inputChars.initTwoByte(cx, string)) { - return false; + return nullptr; } mozilla::Range<const char16_t> input = inputChars.twoByteRange(); @@ -1013,43 +1015,48 @@ bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) { intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx); - auto ok = mozilla::intl::String::ToLocaleLowerCase(locale, input, buffer); + auto ok = + targetCase == TargetCase::Lower + ? mozilla::intl::String::ToLocaleLowerCase(locale, input, buffer) + : mozilla::intl::String::ToLocaleUpperCase(locale, input, buffer); if (ok.isErr()) { intl::ReportInternalError(cx, ok.unwrapErr()); - return false; - } - - JSString* result = buffer.toString(cx); - if (!result) { - return false; + return nullptr; } - args.rval().setString(result); - return true; + return buffer.toString(cx); } +#endif -#else - -// When the Intl API is not exposed, String.prototype.toLowerCase is implemented -// in C++. static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) { AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", "toLocaleLowerCase"); CallArgs args = CallArgsFromVp(argc, vp); - RootedString str( + // Steps 1-2. + Rooted<JSString*> str( cx, ToStringForStringFunction(cx, "toLocaleLowerCase", args.thisv())); if (!str) { return false; } +#if JS_HAS_INTL_API + // Step 3. + auto* result = TransformCase(cx, str, args.get(0), TargetCase::Lower); + if (!result) { + return false; + } + + args.rval().setString(result); + return true; +#else /* * Forcefully ignore the first (or any) argument and return toLowerCase(), * ECMA has reserved that argument, presumably for defining the locale. */ if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) { - RootedValue result(cx); + Rooted<Value> result(cx); if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) { return false; } @@ -1070,10 +1077,9 @@ static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) { args.rval().setString(result); return true; +#endif } -#endif // JS_HAS_INTL_API - static inline bool ToUpperCaseHasSpecialCasing(Latin1Char charCode) { // U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with // special casing rules, so detect it inline. @@ -1359,86 +1365,34 @@ static bool str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) { return true; } -#if JS_HAS_INTL_API -// String.prototype.toLocaleUpperCase is self-hosted when Intl is exposed, -// with core functionality performed by the intrinsic below. - -bool js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) { +static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) { + AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", + "toLocaleUpperCase"); CallArgs args = CallArgsFromVp(argc, vp); - MOZ_ASSERT(args.length() == 2); - MOZ_ASSERT(args[0].isString()); - MOZ_ASSERT(args[1].isString()); - - RootedString string(cx, args[0].toString()); - - const char* locale = CaseMappingLocale(cx, args[1].toString()); - if (!locale) { - return false; - } - - // Call String.prototype.toUpperCase() for language independent casing. - if (HasDefaultCasing(locale)) { - JSString* str = js::StringToUpperCase(cx, string); - if (!str) { - return false; - } - - args.rval().setString(str); - return true; - } - - AutoStableStringChars inputChars(cx); - if (!inputChars.initTwoByte(cx, string)) { - return false; - } - mozilla::Range<const char16_t> input = inputChars.twoByteRange(); - - // Note: maximum case mapping length is three characters, so the result - // length might be > INT32_MAX. ICU will fail in this case. - static_assert(JSString::MAX_LENGTH <= INT32_MAX, - "String length must fit in int32_t for ICU"); - - static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE; - intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx); - - auto ok = mozilla::intl::String::ToLocaleUpperCase(locale, input, buffer); - if (ok.isErr()) { - intl::ReportInternalError(cx, ok.unwrapErr()); + Rooted<JSString*> str( + cx, ToStringForStringFunction(cx, "toLocaleUpperCase", args.thisv())); + if (!str) { return false; } - JSString* result = buffer.toString(cx); +#if JS_HAS_INTL_API + // Step 3. + auto* result = TransformCase(cx, str, args.get(0), TargetCase::Upper); if (!result) { return false; } args.rval().setString(result); return true; -} - #else - -// When the Intl API is not exposed, String.prototype.toUpperCase is implemented -// in C++. -static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) { - AutoJSMethodProfilerEntry pseudoFrame(cx, "String.prototype", - "toLocaleUpperCase"); - CallArgs args = CallArgsFromVp(argc, vp); - - RootedString str( - cx, ToStringForStringFunction(cx, "toLocaleUpperCase", args.thisv())); - if (!str) { - return false; - } - /* * Forcefully ignore the first (or any) argument and return toUpperCase(), * ECMA has reserved that argument, presumably for defining the locale. */ if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) { - RootedValue result(cx); + Rooted<Value> result(cx); if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) { return false; } @@ -1459,10 +1413,9 @@ static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) { args.rval().setString(result); return true; +#endif } -#endif // JS_HAS_INTL_API - /** * String.prototype.localeCompare ( that [ , reserved1 [ , reserved2 ] ] ) * @@ -3892,13 +3845,8 @@ static const JSFunctionSpec string_methods[] = { JS_INLINABLE_FN("trim", str_trim, 0, 0, StringTrim), JS_INLINABLE_FN("trimStart", str_trimStart, 0, 0, StringTrimStart), JS_INLINABLE_FN("trimEnd", str_trimEnd, 0, 0, StringTrimEnd), -#if JS_HAS_INTL_API - JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_toLocaleLowerCase", 0, 0), - JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_toLocaleUpperCase", 0, 0), -#else JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0, 0), JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0, 0), -#endif JS_FN("localeCompare", str_localeCompare, 1, 0), JS_SELF_HOSTED_FN("repeat", "String_repeat", 1, 0), #if JS_HAS_INTL_API diff --git a/js/src/builtin/String.h b/js/src/builtin/String.h @@ -40,26 +40,6 @@ extern bool str_codePointAt(JSContext* cx, unsigned argc, Value* vp); extern bool str_endsWith(JSContext* cx, unsigned argc, Value* vp); -#if JS_HAS_INTL_API -/** - * Returns the input string converted to lower case based on the language - * specific case mappings for the input locale. - * - * Usage: lowerCase = intl_toLocaleLowerCase(string, locale) - */ -[[nodiscard]] extern bool intl_toLocaleLowerCase(JSContext* cx, unsigned argc, - Value* vp); - -/** - * Returns the input string converted to upper case based on the language - * specific case mappings for the input locale. - * - * Usage: upperCase = intl_toLocaleUpperCase(string, locale) - */ -[[nodiscard]] extern bool intl_toLocaleUpperCase(JSContext* cx, unsigned argc, - Value* vp); -#endif - ArrayObject* StringSplitString(JSContext* cx, HandleString str, HandleString sep, uint32_t limit); diff --git a/js/src/builtin/String.js b/js/src/builtin/String.js @@ -841,98 +841,6 @@ function StringIteratorNext() { } SetIsInlinableLargeFunction(StringIteratorNext); -#if JS_HAS_INTL_API -/** - * 13.1.2 String.prototype.toLocaleLowerCase ( [ locales ] ) - * - * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b - */ -function String_toLocaleLowerCase() { - // Step 1. - if (IsNullOrUndefined(this)) { - ThrowIncompatibleMethod("toLocaleLowerCase", this); - } - - // Step 2. - var string = ToString(this); - - // Handle the common cases (no locales argument or a single string - // argument) first. - var locales = ArgumentsLength() ? GetArgument(0) : undefined; - var requestedLocale; - if (locales === undefined) { - // Steps 3, 6. - requestedLocale = undefined; - } else if (typeof locales === "string") { - // Steps 3, 5. - requestedLocale = intl_ValidateAndCanonicalizeLanguageTag(locales, false); - } else { - // Step 3. - var requestedLocales = CanonicalizeLocaleList(locales); - - // Steps 4-6. - requestedLocale = requestedLocales.length ? requestedLocales[0] : undefined; - } - - // Trivial case: When the input is empty, directly return the empty string. - if (string.length === 0) { - return ""; - } - - if (requestedLocale === undefined) { - requestedLocale = intl_DefaultLocale(); - } - - // Steps 7-16. - return intl_toLocaleLowerCase(string, requestedLocale); -} - -/** - * 13.1.3 String.prototype.toLocaleUpperCase ( [ locales ] ) - * - * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b - */ -function String_toLocaleUpperCase() { - // Step 1. - if (IsNullOrUndefined(this)) { - ThrowIncompatibleMethod("toLocaleUpperCase", this); - } - - // Step 2. - var string = ToString(this); - - // Handle the common cases (no locales argument or a single string - // argument) first. - var locales = ArgumentsLength() ? GetArgument(0) : undefined; - var requestedLocale; - if (locales === undefined) { - // Steps 3, 6. - requestedLocale = undefined; - } else if (typeof locales === "string") { - // Steps 3, 5. - requestedLocale = intl_ValidateAndCanonicalizeLanguageTag(locales, false); - } else { - // Step 3. - var requestedLocales = CanonicalizeLocaleList(locales); - - // Steps 4-6. - requestedLocale = requestedLocales.length ? requestedLocales[0] : undefined; - } - - // Trivial case: When the input is empty, directly return the empty string. - if (string.length === 0) { - return ""; - } - - if (requestedLocale === undefined) { - requestedLocale = intl_DefaultLocale(); - } - - // Steps 7-16. - return intl_toLocaleUpperCase(string, requestedLocale); -} -#endif // JS_HAS_INTL_API - // ES2018 draft rev 8fadde42cf6a9879b4ab0cb6142b31c4ee501667 // 21.1.2.4 String.raw ( template, ...substitutions ) function String_static_raw(callSite /*, ...substitutions*/) { diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp @@ -1946,8 +1946,6 @@ static const JSFunctionSpec intrinsic_functions[] = { JS_FN("intl_numberingSystem", intl_numberingSystem, 1, 0), JS_FN("intl_resolveDateTimeFormatComponents", intl_resolveDateTimeFormatComponents, 3, 0), - JS_FN("intl_toLocaleLowerCase", intl_toLocaleLowerCase, 2, 0), - JS_FN("intl_toLocaleUpperCase", intl_toLocaleUpperCase, 2, 0), #endif // JS_HAS_INTL_API // Standard builtins used by self-hosting.