LocaleNegotiation.cpp (16125B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "builtin/intl/LocaleNegotiation.h" 8 9 #include "mozilla/Assertions.h" 10 #include "mozilla/intl/Locale.h" 11 12 #include <algorithm> 13 #include <iterator> 14 #include <stddef.h> 15 16 #include "builtin/Array.h" 17 #include "builtin/intl/CommonFunctions.h" 18 #include "builtin/intl/FormatBuffer.h" 19 #include "builtin/intl/SharedIntlData.h" 20 #include "builtin/intl/StringAsciiChars.h" 21 #include "js/Conversions.h" 22 #include "js/Result.h" 23 #include "vm/ArrayObject.h" 24 #include "vm/GlobalObject.h" 25 #include "vm/JSContext.h" 26 #include "vm/Realm.h" 27 #include "vm/StringType.h" 28 29 #include "vm/NativeObject-inl.h" 30 #include "vm/ObjectOperations-inl.h" 31 32 using namespace js; 33 using namespace js::intl; 34 35 static bool AssertCanonicalLocaleWithoutUnicodeExtension( 36 JSContext* cx, Handle<JSLinearString*> locale) { 37 #ifdef DEBUG 38 MOZ_ASSERT(StringIsAscii(locale), "language tags are ASCII-only"); 39 40 // |locale| is a structurally valid language tag. 41 mozilla::intl::Locale tag; 42 43 using ParserError = mozilla::intl::LocaleParser::ParserError; 44 mozilla::Result<mozilla::Ok, ParserError> parse_result = Ok(); 45 { 46 intl::StringAsciiChars chars(locale); 47 if (!chars.init(cx)) { 48 return false; 49 } 50 51 parse_result = mozilla::intl::LocaleParser::TryParse(chars, tag); 52 } 53 54 if (parse_result.isErr()) { 55 MOZ_ASSERT(parse_result.unwrapErr() == ParserError::OutOfMemory, 56 "locale is a structurally valid language tag"); 57 58 intl::ReportInternalError(cx); 59 return false; 60 } 61 62 MOZ_ASSERT(!tag.GetUnicodeExtension(), 63 "locale must contain no Unicode extensions"); 64 65 if (auto result = tag.Canonicalize(); result.isErr()) { 66 MOZ_ASSERT(result.unwrapErr() != 67 mozilla::intl::Locale::CanonicalizationError::DuplicateVariant); 68 intl::ReportInternalError(cx); 69 return false; 70 } 71 72 intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); 73 if (auto result = tag.ToString(buffer); result.isErr()) { 74 intl::ReportInternalError(cx, result.unwrapErr()); 75 return false; 76 } 77 78 MOZ_ASSERT(StringEqualsAscii(locale, buffer.data(), buffer.length()), 79 "locale is a canonicalized language tag"); 80 #endif 81 return true; 82 } 83 84 static bool SameOrParentLocale(const JSLinearString* locale, 85 const JSLinearString* otherLocale) { 86 // Return true if |locale| is the same locale as |otherLocale|. 87 if (locale->length() == otherLocale->length()) { 88 return EqualStrings(locale, otherLocale); 89 } 90 91 // Also return true if |locale| is the parent locale of |otherLocale|. 92 if (locale->length() < otherLocale->length()) { 93 return HasSubstringAt(otherLocale, locale, 0) && 94 otherLocale->latin1OrTwoByteChar(locale->length()) == '-'; 95 } 96 97 return false; 98 } 99 100 /** 101 * 9.2.2 BestAvailableLocale ( availableLocales, locale ) 102 * 103 * Compares a BCP 47 language tag against the locales in availableLocales and 104 * returns the best available match. Uses the fallback mechanism of RFC 4647, 105 * section 3.4. 106 * 107 * Spec: ECMAScript Internationalization API Specification, 9.2.2. 108 * Spec: RFC 4647, section 3.4. 109 */ 110 static JS::Result<JSLinearString*> BestAvailableLocale( 111 JSContext* cx, AvailableLocaleKind availableLocales, 112 Handle<JSLinearString*> locale, Handle<JSLinearString*> defaultLocale) { 113 // In the spec, [[availableLocales]] is formally a list of all available 114 // locales. But in our implementation, it's an *incomplete* list, not 115 // necessarily including the default locale (and all locales implied by it, 116 // e.g. "de" implied by "de-CH"), if that locale isn't in every 117 // [[availableLocales]] list (because that locale is supported through 118 // fallback, e.g. "de-CH" supported through "de"). 119 // 120 // If we're considering the default locale, augment the spec loop with 121 // additional checks to also test whether the current prefix is a prefix of 122 // the default locale. 123 124 intl::SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); 125 126 auto findLast = [](const auto* chars, size_t length) { 127 auto rbegin = std::make_reverse_iterator(chars + length); 128 auto rend = std::make_reverse_iterator(chars); 129 auto p = std::find(rbegin, rend, '-'); 130 131 // |dist(chars, p.base())| is equal to |dist(p, rend)|, pick whichever you 132 // find easier to reason about when using reserve iterators. 133 ptrdiff_t r = std::distance(chars, p.base()); 134 MOZ_ASSERT(r == std::distance(p, rend)); 135 136 // But always subtract one to convert from the reverse iterator result to 137 // the correspoding forward iterator value, because reserve iterators point 138 // to one element past the forward iterator value. 139 return r - 1; 140 }; 141 142 if (!AssertCanonicalLocaleWithoutUnicodeExtension(cx, locale)) { 143 return cx->alreadyReportedError(); 144 } 145 146 // Step 1. 147 Rooted<JSLinearString*> candidate(cx, locale); 148 149 // Step 2. 150 while (true) { 151 // Step 2.a. 152 bool supported = false; 153 if (!sharedIntlData.isAvailableLocale(cx, availableLocales, candidate, 154 &supported)) { 155 return cx->alreadyReportedError(); 156 } 157 if (supported) { 158 return candidate.get(); 159 } 160 161 if (defaultLocale && SameOrParentLocale(candidate, defaultLocale)) { 162 return candidate.get(); 163 } 164 165 // Step 2.b. 166 ptrdiff_t pos; 167 if (candidate->hasLatin1Chars()) { 168 JS::AutoCheckCannotGC nogc; 169 pos = findLast(candidate->latin1Chars(nogc), candidate->length()); 170 } else { 171 JS::AutoCheckCannotGC nogc; 172 pos = findLast(candidate->twoByteChars(nogc), candidate->length()); 173 } 174 175 if (pos < 0) { 176 return nullptr; 177 } 178 179 // Step 2.c. 180 size_t length = size_t(pos); 181 if (length >= 2 && candidate->latin1OrTwoByteChar(length - 2) == '-') { 182 length -= 2; 183 } 184 185 // Step 2.d. 186 candidate = NewDependentString(cx, candidate, 0, length); 187 if (!candidate) { 188 return cx->alreadyReportedError(); 189 } 190 } 191 } 192 193 // 9.2.2 BestAvailableLocale ( availableLocales, locale ) 194 // 195 // Carries an additional third argument in our implementation to provide the 196 // default locale. See the doc-comment in the header file. 197 bool js::intl::BestAvailableLocale(JSContext* cx, 198 AvailableLocaleKind availableLocales, 199 Handle<JSLinearString*> locale, 200 Handle<JSLinearString*> defaultLocale, 201 MutableHandle<JSLinearString*> result) { 202 JSLinearString* res; 203 JS_TRY_VAR_OR_RETURN_FALSE( 204 cx, res, 205 BestAvailableLocale(cx, availableLocales, locale, defaultLocale)); 206 if (res) { 207 result.set(res); 208 } else { 209 result.set(nullptr); 210 } 211 return true; 212 } 213 214 template <typename CharT> 215 static size_t BaseNameLength(mozilla::Range<const CharT> locale) { 216 // Search for the start of the first singleton subtag. 217 for (size_t i = 0; i < locale.length(); i++) { 218 if (locale[i] == '-') { 219 MOZ_RELEASE_ASSERT(i + 2 < locale.length(), "invalid locale"); 220 if (locale[i + 2] == '-') { 221 return i; 222 } 223 } 224 } 225 return locale.length(); 226 } 227 228 static size_t BaseNameLength(JSLinearString* locale) { 229 JS::AutoCheckCannotGC nogc; 230 if (locale->hasLatin1Chars()) { 231 return BaseNameLength(locale->latin1Range(nogc)); 232 } 233 return BaseNameLength(locale->twoByteRange(nogc)); 234 } 235 236 /** 237 * Returns the subset of requestedLocales for which availableLocales has a 238 * matching (possibly fallback) locale. Locales appear in the same order in the 239 * returned list as in the input list. 240 * 241 * Spec: ECMAScript Internationalization API Specification, 9.2.7. 242 * Spec: ECMAScript Internationalization API Specification, 9.2.8. 243 */ 244 static bool LookupSupportedLocales( 245 JSContext* cx, AvailableLocaleKind availableLocales, 246 Handle<LocalesList> requestedLocales, 247 MutableHandle<LocalesList> supportedLocales) { 248 // Step 1. 249 MOZ_ASSERT(supportedLocales.empty()); 250 251 Rooted<JSLinearString*> defaultLocale( 252 cx, cx->global()->globalIntlData().defaultLocale(cx)); 253 if (!defaultLocale) { 254 return false; 255 } 256 257 // Step 2. 258 Rooted<JSLinearString*> noExtensionsLocale(cx); 259 Rooted<JSLinearString*> availableLocale(cx); 260 for (size_t i = 0; i < requestedLocales.length(); i++) { 261 auto locale = requestedLocales[i]; 262 263 // Step 2.a. 264 // 265 // Use the base name to ignore any extension sequences. 266 noExtensionsLocale = 267 NewDependentString(cx, locale, 0, BaseNameLength(locale)); 268 if (!noExtensionsLocale) { 269 return false; 270 } 271 272 // Step 2.b. 273 JSLinearString* availableLocale; 274 JS_TRY_VAR_OR_RETURN_FALSE( 275 cx, availableLocale, 276 BestAvailableLocale(cx, availableLocales, noExtensionsLocale, 277 defaultLocale)); 278 279 // Step 2.c. 280 if (availableLocale) { 281 if (!supportedLocales.append(locale)) { 282 return false; 283 } 284 } 285 } 286 287 // Step 3. 288 return true; 289 } 290 291 /** 292 * Returns the subset of requestedLocales for which availableLocales has a 293 * matching (possibly fallback) locale. Locales appear in the same order in the 294 * returned list as in the input list. 295 * 296 * Spec: ECMAScript Internationalization API Specification, 9.2.9. 297 */ 298 static bool SupportedLocales(JSContext* cx, 299 AvailableLocaleKind availableLocales, 300 Handle<LocalesList> requestedLocales, 301 Handle<Value> options, 302 MutableHandle<LocalesList> supportedLocales) { 303 // Step 1. 304 if (!options.isUndefined()) { 305 // Step 1.a. 306 Rooted<JSObject*> obj(cx, ToObject(cx, options)); 307 if (!obj) { 308 return false; 309 } 310 311 // Step 1.b. 312 Rooted<Value> localeMatcher(cx); 313 if (!GetProperty(cx, obj, obj, cx->names().localeMatcher, &localeMatcher)) { 314 return false; 315 } 316 317 if (!localeMatcher.isUndefined()) { 318 JSString* str = ToString(cx, localeMatcher); 319 if (!str) { 320 return false; 321 } 322 323 JSLinearString* linear = str->ensureLinear(cx); 324 if (!linear) { 325 return false; 326 } 327 328 if (!StringEqualsLiteral(linear, "lookup") && 329 !StringEqualsLiteral(linear, "best fit")) { 330 if (auto chars = QuoteString(cx, linear)) { 331 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 332 JSMSG_INVALID_LOCALE_MATCHER, chars.get()); 333 } 334 return false; 335 } 336 } 337 } 338 339 // Steps 2-5. 340 // 341 // We don't yet support anything better than the lookup matcher. 342 return LookupSupportedLocales(cx, availableLocales, requestedLocales, 343 supportedLocales); 344 } 345 346 ArrayObject* js::intl::LocalesListToArray(JSContext* cx, 347 Handle<LocalesList> locales) { 348 auto* array = NewDenseFullyAllocatedArray(cx, locales.length()); 349 if (!array) { 350 return nullptr; 351 } 352 array->setDenseInitializedLength(locales.length()); 353 354 for (size_t i = 0; i < locales.length(); i++) { 355 array->initDenseElement(i, StringValue(locales[i])); 356 } 357 return array; 358 } 359 360 ArrayObject* js::intl::SupportedLocalesOf(JSContext* cx, 361 AvailableLocaleKind availableLocales, 362 Handle<Value> locales, 363 Handle<Value> options) { 364 Rooted<LocalesList> requestedLocales(cx, cx); 365 if (!CanonicalizeLocaleList(cx, locales, &requestedLocales)) { 366 return nullptr; 367 } 368 369 Rooted<LocalesList> supportedLocales(cx, cx); 370 if (!SupportedLocales(cx, availableLocales, requestedLocales, options, 371 &supportedLocales)) { 372 return nullptr; 373 } 374 375 return LocalesListToArray(cx, supportedLocales); 376 } 377 378 JSLinearString* js::intl::ComputeDefaultLocale(JSContext* cx) { 379 const char* locale = cx->realm()->getLocale(); 380 if (!locale) { 381 ReportOutOfMemory(cx); 382 return nullptr; 383 } 384 385 auto span = mozilla::MakeStringSpan(locale); 386 387 mozilla::intl::Locale tag; 388 bool canParseLocale = 389 mozilla::intl::LocaleParser::TryParse(span, tag).isOk() && 390 tag.Canonicalize().isOk(); 391 392 Rooted<JSLinearString*> candidate(cx); 393 if (!canParseLocale) { 394 candidate = NewStringCopyZ<CanGC>(cx, intl::LastDitchLocale()); 395 if (!candidate) { 396 return nullptr; 397 } 398 } else { 399 // The default locale must be in [[AvailableLocales]], and that list must 400 // not contain any locales with Unicode extension sequences, so remove any 401 // present in the candidate. 402 tag.ClearUnicodeExtension(); 403 404 intl::FormatBuffer<char, intl::INITIAL_CHAR_BUFFER_SIZE> buffer(cx); 405 if (auto result = tag.ToString(buffer); result.isErr()) { 406 intl::ReportInternalError(cx, result.unwrapErr()); 407 return nullptr; 408 } 409 410 candidate = buffer.toAsciiString(cx); 411 if (!candidate) { 412 return nullptr; 413 } 414 415 // Certain old-style language tags lack a script code, but in current 416 // usage they *would* include a script code. Map these over to modern 417 // forms. 418 for (const auto& mapping : js::intl::oldStyleLanguageTagMappings) { 419 const char* oldStyle = mapping.oldStyle; 420 const char* modernStyle = mapping.modernStyle; 421 422 if (StringEqualsAscii(candidate, oldStyle)) { 423 candidate = NewStringCopyZ<CanGC>(cx, modernStyle); 424 if (!candidate) { 425 return nullptr; 426 } 427 break; 428 } 429 } 430 } 431 432 // 9.1 Internal slots of Service Constructors 433 // 434 // - [[AvailableLocales]] is a List [...]. The list must include the value 435 // returned by the DefaultLocale abstract operation (6.2.4), [...]. 436 // 437 // That implies we must ignore any candidate which isn't supported by all 438 // Intl service constructors. 439 440 Rooted<JSLinearString*> supportedCollator(cx); 441 JS_TRY_VAR_OR_RETURN_NULL( 442 cx, supportedCollator, 443 BestAvailableLocale(cx, AvailableLocaleKind::Collator, candidate, 444 nullptr)); 445 446 Rooted<JSLinearString*> supportedDateTimeFormat(cx); 447 JS_TRY_VAR_OR_RETURN_NULL( 448 cx, supportedDateTimeFormat, 449 BestAvailableLocale(cx, AvailableLocaleKind::DateTimeFormat, candidate, 450 nullptr)); 451 452 #ifdef DEBUG 453 // Note: We don't test the supported locales of the remaining Intl service 454 // constructors, because the set of supported locales is exactly equal to 455 // the set of supported locales of Intl.DateTimeFormat. 456 for (auto kind : { 457 AvailableLocaleKind::DisplayNames, 458 AvailableLocaleKind::DurationFormat, 459 AvailableLocaleKind::ListFormat, 460 AvailableLocaleKind::NumberFormat, 461 AvailableLocaleKind::PluralRules, 462 AvailableLocaleKind::RelativeTimeFormat, 463 AvailableLocaleKind::Segmenter, 464 }) { 465 JSLinearString* supported; 466 JS_TRY_VAR_OR_RETURN_NULL( 467 cx, supported, BestAvailableLocale(cx, kind, candidate, nullptr)); 468 469 MOZ_ASSERT(!!supported == !!supportedDateTimeFormat); 470 MOZ_ASSERT_IF(supported, EqualStrings(supported, supportedDateTimeFormat)); 471 } 472 #endif 473 474 // Accept the candidate locale if it is supported by all Intl service 475 // constructors. 476 if (supportedCollator && supportedDateTimeFormat) { 477 // Use the actually supported locale instead of the candidate locale. For 478 // example when the candidate locale "en-US-posix" is supported through 479 // "en-US", use "en-US" as the default locale. 480 // 481 // Also prefer the supported locale with more subtags. For example when 482 // requesting "de-CH" and Intl.DateTimeFormat supports "de-CH", but 483 // Intl.Collator only "de", still return "de-CH" as the result. 484 if (SameOrParentLocale(supportedCollator, supportedDateTimeFormat)) { 485 return supportedDateTimeFormat; 486 } 487 return supportedCollator; 488 } 489 490 // Return the last ditch locale if the candidate locale isn't supported. 491 return NewStringCopyZ<CanGC>(cx, intl::LastDitchLocale()); 492 }