LanguageTag.cpp (8592B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "builtin/intl/LanguageTag.h" 8 9 #include "mozilla/intl/Locale.h" 10 #include "mozilla/Span.h" 11 12 #include "builtin/intl/CommonFunctions.h" 13 #include "builtin/intl/FormatBuffer.h" 14 #include "builtin/intl/StringAsciiChars.h" 15 #include "gc/Tracer.h" 16 #include "vm/JSAtomState.h" 17 #include "vm/JSContext.h" 18 19 #include "vm/JSObject-inl.h" 20 #include "vm/ObjectOperations-inl.h" 21 22 bool js::intl::ParseLocale(JSContext* cx, Handle<JSLinearString*> str, 23 mozilla::intl::Locale& result) { 24 if (StringIsAscii(str)) { 25 intl::StringAsciiChars chars(str); 26 if (!chars.init(cx)) { 27 return false; 28 } 29 30 if (mozilla::intl::LocaleParser::TryParse(chars, result).isOk()) { 31 return true; 32 } 33 } 34 35 if (UniqueChars localeChars = QuoteString(cx, str, '"')) { 36 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, 37 JSMSG_INVALID_LANGUAGE_TAG, localeChars.get()); 38 } 39 return false; 40 } 41 42 bool js::intl::ParseStandaloneLanguageTag( 43 Handle<JSLinearString*> str, mozilla::intl::LanguageSubtag& result) { 44 // Tell the analysis the |IsStructurallyValidLanguageTag| function can't GC. 45 JS::AutoSuppressGCAnalysis nogc; 46 47 if (str->hasLatin1Chars()) { 48 if (!mozilla::intl::IsStructurallyValidLanguageTag<Latin1Char>( 49 str->latin1Range(nogc))) { 50 return false; 51 } 52 result.Set<Latin1Char>(str->latin1Range(nogc)); 53 } else { 54 if (!mozilla::intl::IsStructurallyValidLanguageTag<char16_t>( 55 str->twoByteRange(nogc))) { 56 return false; 57 } 58 result.Set<char16_t>(str->twoByteRange(nogc)); 59 } 60 return true; 61 } 62 63 bool js::intl::ParseStandaloneScriptTag(Handle<JSLinearString*> str, 64 mozilla::intl::ScriptSubtag& result) { 65 // Tell the analysis the |IsStructurallyValidScriptTag| function can't GC. 66 JS::AutoSuppressGCAnalysis nogc; 67 68 if (str->hasLatin1Chars()) { 69 if (!mozilla::intl::IsStructurallyValidScriptTag<Latin1Char>( 70 str->latin1Range(nogc))) { 71 return false; 72 } 73 result.Set<Latin1Char>(str->latin1Range(nogc)); 74 } else { 75 if (!mozilla::intl::IsStructurallyValidScriptTag<char16_t>( 76 str->twoByteRange(nogc))) { 77 return false; 78 } 79 result.Set<char16_t>(str->twoByteRange(nogc)); 80 } 81 return true; 82 } 83 84 bool js::intl::ParseStandaloneRegionTag(Handle<JSLinearString*> str, 85 mozilla::intl::RegionSubtag& result) { 86 // Tell the analysis the |IsStructurallyValidRegionTag| function can't GC. 87 JS::AutoSuppressGCAnalysis nogc; 88 89 if (str->hasLatin1Chars()) { 90 if (!mozilla::intl::IsStructurallyValidRegionTag<Latin1Char>( 91 str->latin1Range(nogc))) { 92 return false; 93 } 94 result.Set<Latin1Char>(str->latin1Range(nogc)); 95 } else { 96 if (!mozilla::intl::IsStructurallyValidRegionTag<char16_t>( 97 str->twoByteRange(nogc))) { 98 return false; 99 } 100 result.Set<char16_t>(str->twoByteRange(nogc)); 101 } 102 return true; 103 } 104 105 template <typename CharT> 106 static bool ParseStandaloneVariantTag( 107 mozilla::Span<const CharT> variantSubtags, 108 mozilla::intl::Locale::VariantsVector& result, bool* success) { 109 auto isValidVariantSubtag = [&](auto span) { 110 // Tell the analysis the |IsStructurallyValidVariantTag| function can't GC. 111 JS::AutoSuppressGCAnalysis nogc; 112 return mozilla::intl::IsStructurallyValidVariantTag(span); 113 }; 114 115 size_t start = 0; 116 for (size_t index = 0; index < variantSubtags.size(); index++) { 117 if (variantSubtags[index] == '-') { 118 auto span = variantSubtags.FromTo(start, index); 119 if (!isValidVariantSubtag(span)) { 120 *success = false; 121 return true; 122 } 123 124 if (!result.emplaceBack(span)) { 125 return false; 126 } 127 128 start = index + 1; 129 } 130 } 131 132 // Trailing variant subtag. 133 auto span = variantSubtags.From(start); 134 if (!isValidVariantSubtag(span)) { 135 *success = false; 136 return true; 137 } 138 139 if (!result.emplaceBack(span)) { 140 return false; 141 } 142 143 *success = true; 144 return true; 145 } 146 147 bool js::intl::ParseStandaloneVariantTag( 148 Handle<JSLinearString*> str, mozilla::intl::Locale::VariantsVector& result, 149 bool* success) { 150 JS::AutoCheckCannotGC nogc; 151 return str->hasLatin1Chars() 152 ? ::ParseStandaloneVariantTag( 153 mozilla::Span{str->latin1Range(nogc)}, result, success) 154 : ::ParseStandaloneVariantTag( 155 mozilla::Span{str->twoByteRange(nogc)}, result, success); 156 } 157 158 template <typename CharT> 159 static bool IsAsciiLowercaseAlpha(mozilla::Span<const CharT> span) { 160 // Tell the analysis the |std::all_of| function can't GC. 161 JS::AutoSuppressGCAnalysis nogc; 162 163 const CharT* ptr = span.data(); 164 size_t length = span.size(); 165 return std::all_of(ptr, ptr + length, mozilla::IsAsciiLowercaseAlpha<CharT>); 166 } 167 168 static bool IsAsciiLowercaseAlpha(const JSLinearString* str) { 169 JS::AutoCheckCannotGC nogc; 170 if (str->hasLatin1Chars()) { 171 return IsAsciiLowercaseAlpha<JS::Latin1Char>(str->latin1Range(nogc)); 172 } 173 return IsAsciiLowercaseAlpha<char16_t>(str->twoByteRange(nogc)); 174 } 175 176 template <typename CharT> 177 static bool IsAsciiAlpha(mozilla::Span<const CharT> span) { 178 // Tell the analysis the |std::all_of| function can't GC. 179 JS::AutoSuppressGCAnalysis nogc; 180 181 const CharT* ptr = span.data(); 182 size_t length = span.size(); 183 return std::all_of(ptr, ptr + length, mozilla::IsAsciiAlpha<CharT>); 184 } 185 186 static bool IsAsciiAlpha(const JSLinearString* str) { 187 JS::AutoCheckCannotGC nogc; 188 if (str->hasLatin1Chars()) { 189 return IsAsciiAlpha<JS::Latin1Char>(str->latin1Range(nogc)); 190 } 191 return IsAsciiAlpha<char16_t>(str->twoByteRange(nogc)); 192 } 193 194 JS::Result<JSLinearString*> js::intl::ParseStandaloneISO639LanguageTag( 195 JSContext* cx, Handle<JSLinearString*> str) { 196 // ISO-639 language codes contain either two or three characters. 197 size_t length = str->length(); 198 if (length != 2 && length != 3) { 199 return nullptr; 200 } 201 202 // We can directly the return the input below if it's in the correct case. 203 bool isLowerCase = IsAsciiLowercaseAlpha(str); 204 if (!isLowerCase) { 205 // Must be an ASCII alpha string. 206 if (!IsAsciiAlpha(str)) { 207 return nullptr; 208 } 209 } 210 211 mozilla::intl::LanguageSubtag languageTag; 212 if (str->hasLatin1Chars()) { 213 JS::AutoCheckCannotGC nogc; 214 languageTag.Set<Latin1Char>(str->latin1Range(nogc)); 215 } else { 216 JS::AutoCheckCannotGC nogc; 217 languageTag.Set<char16_t>(str->twoByteRange(nogc)); 218 } 219 220 if (!isLowerCase) { 221 // The language subtag is canonicalized to lower case. 222 languageTag.ToLowerCase(); 223 } 224 225 // Reject the input if the canonical tag contains more than just a single 226 // language subtag. 227 if (mozilla::intl::Locale::ComplexLanguageMapping(languageTag)) { 228 return nullptr; 229 } 230 231 // Take care to replace deprecated subtags with their preferred values. 232 JSLinearString* result; 233 if (mozilla::intl::Locale::LanguageMapping(languageTag) || !isLowerCase) { 234 result = NewStringCopy<CanGC>(cx, languageTag.Span()); 235 } else { 236 result = str; 237 } 238 if (!result) { 239 return cx->alreadyReportedOOM(); 240 } 241 return result; 242 } 243 244 JS::UniqueChars js::intl::FormatLocale( 245 JSContext* cx, JS::Handle<JSObject*> internals, 246 JS::HandleVector<UnicodeExtensionKeyword> keywords) { 247 RootedValue value(cx); 248 if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) { 249 return nullptr; 250 } 251 252 mozilla::intl::Locale tag; 253 { 254 Rooted<JSLinearString*> locale(cx, value.toString()->ensureLinear(cx)); 255 if (!locale) { 256 return nullptr; 257 } 258 259 if (!ParseLocale(cx, locale, tag)) { 260 return nullptr; 261 } 262 } 263 264 // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of 265 // the Unicode extension subtag. We're then relying on ICU to follow RFC 266 // 6067, which states that any trailing keywords using the same key 267 // should be ignored. 268 if (!ApplyUnicodeExtensionToTag(cx, tag, keywords)) { 269 return nullptr; 270 } 271 272 FormatBuffer<char> buffer(cx); 273 if (auto result = tag.ToString(buffer); result.isErr()) { 274 ReportInternalError(cx, result.unwrapErr()); 275 return nullptr; 276 } 277 return buffer.extractStringZ(); 278 } 279 280 void js::intl::UnicodeExtensionKeyword::trace(JSTracer* trc) { 281 TraceRoot(trc, &type_, "UnicodeExtensionKeyword::type"); 282 }