nsLanguageAtomService.cpp (8750B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "nsLanguageAtomService.h" 7 8 #include "mozilla/Encoding.h" 9 #include "mozilla/intl/Locale.h" 10 #include "mozilla/intl/OSPreferences.h" 11 #include "MainThreadUtils.h" 12 #include "nsGkAtoms.h" 13 #include "nsUConvPropertySearch.h" 14 #include "nsUnicharUtils.h" 15 #include "MainThreadUtils.h" 16 17 #include <mutex> // for call_once 18 19 using namespace mozilla; 20 using mozilla::intl::OSPreferences; 21 22 // List of mozilla internal x-* tags that map to themselves (see bug 256257) 23 static constexpr nsStaticAtom* kLangGroups[] = { 24 // This list must be sorted! 25 nsGkAtoms::x_armn, nsGkAtoms::x_cyrillic, nsGkAtoms::x_devanagari, 26 nsGkAtoms::x_geor, nsGkAtoms::x_math, nsGkAtoms::x_tamil, 27 nsGkAtoms::Unicode, nsGkAtoms::x_western 28 // These self-mappings are not necessary unless somebody use them to specify 29 // lang in (X)HTML/XML documents, which they shouldn't. (see bug 256257) 30 // x-beng=x-beng 31 // x-cans=x-cans 32 // x-ethi=x-ethi 33 // x-guru=x-guru 34 // x-gujr=x-gujr 35 // x-khmr=x-khmr 36 // x-mlym=x-mlym 37 }; 38 39 // Map ISO 15924 script codes from BCP47 lang tag to mozilla's langGroups. 40 static constexpr struct { 41 const char* mTag; 42 nsStaticAtom* mAtom; 43 } kScriptLangGroup[] = { 44 // This list must be sorted by script code! 45 {"Arab", nsGkAtoms::ar}, 46 {"Armn", nsGkAtoms::x_armn}, 47 {"Beng", nsGkAtoms::x_beng}, 48 {"Cans", nsGkAtoms::x_cans}, 49 {"Cyrl", nsGkAtoms::x_cyrillic}, 50 {"Deva", nsGkAtoms::x_devanagari}, 51 {"Ethi", nsGkAtoms::x_ethi}, 52 {"Geok", nsGkAtoms::x_geor}, 53 {"Geor", nsGkAtoms::x_geor}, 54 {"Grek", nsGkAtoms::el}, 55 {"Gujr", nsGkAtoms::x_gujr}, 56 {"Guru", nsGkAtoms::x_guru}, 57 {"Hang", nsGkAtoms::ko}, 58 // Hani is not mapped to a specific langGroup, we prefer to look at the 59 // primary language subtag in this case 60 {"Hans", nsGkAtoms::Chinese}, 61 // Hant is special-cased in code 62 // Hant=zh-HK 63 // Hant=zh-TW 64 {"Hebr", nsGkAtoms::he}, 65 {"Hira", nsGkAtoms::Japanese}, 66 {"Jpan", nsGkAtoms::Japanese}, 67 {"Kana", nsGkAtoms::Japanese}, 68 {"Khmr", nsGkAtoms::x_khmr}, 69 {"Knda", nsGkAtoms::x_knda}, 70 {"Kore", nsGkAtoms::ko}, 71 {"Latn", nsGkAtoms::x_western}, 72 {"Mlym", nsGkAtoms::x_mlym}, 73 {"Orya", nsGkAtoms::x_orya}, 74 {"Sinh", nsGkAtoms::x_sinh}, 75 {"Taml", nsGkAtoms::x_tamil}, 76 {"Telu", nsGkAtoms::x_telu}, 77 {"Thai", nsGkAtoms::th}, 78 {"Tibt", nsGkAtoms::x_tibt}}; 79 80 StaticAutoPtr<nsLanguageAtomService> nsLanguageAtomService::sLangAtomService; 81 82 // static 83 nsLanguageAtomService* nsLanguageAtomService::GetService() { 84 static std::once_flag sOnce; 85 86 std::call_once(sOnce, 87 []() { sLangAtomService = new nsLanguageAtomService(); }); 88 89 return sLangAtomService.get(); 90 } 91 92 // static 93 void nsLanguageAtomService::Shutdown() { 94 // We only expect to be shut down by the main thread. 95 MOZ_ASSERT(NS_IsMainThread()); 96 sLangAtomService = nullptr; 97 } 98 99 nsStaticAtom* nsLanguageAtomService::LookupLanguage( 100 const nsACString& aLanguage) { 101 nsAutoCString lowered(aLanguage); 102 ToLowerCase(lowered); 103 104 RefPtr<nsAtom> lang = NS_Atomize(lowered); 105 return GetLanguageGroup(lang); 106 } 107 108 nsAtom* nsLanguageAtomService::GetLocaleLanguage() { 109 { 110 AutoReadLock lock(mLock); 111 if (mLocaleLanguage) { 112 return mLocaleLanguage; 113 } 114 } 115 116 AutoWriteLock lock(mLock); 117 if (!mLocaleLanguage) { 118 AutoTArray<nsCString, 10> regionalPrefsLocales; 119 // XXX Are the OSPreferences calls here safe to call from any thread? 120 // In practice GetLocaleLanguage will be called early on the main thread 121 // (e.g. by nsFontCache), so mLocaleLanguage should be safely initialized 122 // before we try to use it from worker threads, but that may not be fully 123 // guaranteed. 124 if (NS_SUCCEEDED(OSPreferences::GetInstance()->GetRegionalPrefsLocales( 125 regionalPrefsLocales))) { 126 // use lowercase for all language atoms 127 ToLowerCase(regionalPrefsLocales[0]); 128 mLocaleLanguage = NS_Atomize(regionalPrefsLocales[0]); 129 } else { 130 nsAutoCString locale; 131 OSPreferences::GetInstance()->GetSystemLocale(locale); 132 133 ToLowerCase(locale); // use lowercase for all language atoms 134 mLocaleLanguage = NS_Atomize(locale); 135 } 136 } 137 138 return mLocaleLanguage; 139 } 140 141 nsStaticAtom* nsLanguageAtomService::GetLanguageGroup(nsAtom* aLanguage) { 142 { 143 AutoReadLock lock(mLock); 144 if (nsStaticAtom* atom = mLangToGroup.Get(aLanguage)) { 145 return atom; 146 } 147 } 148 149 AutoWriteLock lock(mLock); 150 return mLangToGroup.LookupOrInsertWith( 151 aLanguage, [&] { return GetUncachedLanguageGroup(aLanguage); }); 152 } 153 154 nsStaticAtom* nsLanguageAtomService::GetUncachedLanguageGroup( 155 nsAtom* aLanguage) const { 156 nsAutoCString langStr; 157 aLanguage->ToUTF8String(langStr); 158 ToLowerCase(langStr); 159 160 if (langStr[0] == 'x' && langStr[1] == '-') { 161 // Internal x-* langGroup codes map to themselves (see bug 256257) 162 for (nsStaticAtom* langGroup : kLangGroups) { 163 if (langGroup == aLanguage) { 164 return langGroup; 165 } 166 if (aLanguage->IsAsciiLowercase()) { 167 continue; 168 } 169 // Do the slow ascii-case-insensitive comparison just if needed. 170 nsDependentAtomString string(langGroup); 171 if (string.EqualsASCII(langStr.get(), langStr.Length())) { 172 return langGroup; 173 } 174 } 175 } else { 176 // If the lang code can be parsed as BCP47, look up its (likely) script. 177 178 // https://bugzilla.mozilla.org/show_bug.cgi?id=1618034: 179 // First strip any private subtags that would cause Locale to reject the 180 // tag as non-wellformed. 181 nsACString::const_iterator start, end; 182 langStr.BeginReading(start); 183 langStr.EndReading(end); 184 if (FindInReadable("-x-"_ns, start, end)) { 185 // The substring we want ends at the beginning of the "-x-" subtag. 186 langStr.Truncate(start.get() - langStr.BeginReading()); 187 } 188 189 intl::Locale loc; 190 auto result = intl::LocaleParser::TryParse(langStr, loc); 191 if (!result.isOk()) { 192 // Did the author (wrongly) use '_' instead of '-' to separate subtags? 193 // If so, fix it up and re-try parsing. 194 if (langStr.Contains('_')) { 195 langStr.ReplaceChar('_', '-'); 196 197 // Throw away the partially parsed locale and re-start parsing. 198 loc = {}; 199 result = intl::LocaleParser::TryParse(langStr, loc); 200 } 201 } 202 if (result.isOk() && loc.Canonicalize().isOk()) { 203 // Fill in script subtag if not present. 204 if (loc.Script().Missing()) { 205 // No script. At this point it's fair to assume that en-* maps to 206 // x-western. This fast path avoids the slow call to AddLikelySubtags. 207 if (loc.Language().EqualTo("en")) { 208 return nsGkAtoms::x_western; 209 } 210 211 if (loc.AddLikelySubtags().isErr()) { 212 // Fall back to x-unicode if no match was found 213 return nsGkAtoms::Unicode; 214 } 215 } 216 // Traditional Chinese has separate prefs for Hong Kong / Taiwan; 217 // check the region subtag. 218 if (loc.Script().EqualTo("Hant")) { 219 if (loc.Region().EqualTo("HK")) { 220 return nsGkAtoms::HongKongChinese; 221 } 222 return nsGkAtoms::Taiwanese; 223 } 224 // Search list of known script subtags that map to langGroup codes. 225 size_t foundIndex; 226 Span<const char> scriptAsSpan = loc.Script().Span(); 227 nsDependentCSubstring script(scriptAsSpan.data(), scriptAsSpan.size()); 228 if (BinarySearchIf( 229 kScriptLangGroup, 0, std::size(kScriptLangGroup), 230 [script](const auto& entry) -> int { 231 return Compare(script, nsDependentCString(entry.mTag)); 232 }, 233 &foundIndex)) { 234 return kScriptLangGroup[foundIndex].mAtom; 235 } 236 // Script subtag was not recognized (includes "Hani"); check the language 237 // subtag for CJK possibilities so that we'll prefer the appropriate font 238 // rather than falling back to the browser's hardcoded preference. 239 if (loc.Language().EqualTo("zh")) { 240 if (loc.Region().EqualTo("HK")) { 241 return nsGkAtoms::HongKongChinese; 242 } 243 if (loc.Region().EqualTo("TW")) { 244 return nsGkAtoms::Taiwanese; 245 } 246 return nsGkAtoms::Chinese; 247 } 248 if (loc.Language().EqualTo("ja")) { 249 return nsGkAtoms::Japanese; 250 } 251 if (loc.Language().EqualTo("ko")) { 252 return nsGkAtoms::ko; 253 } 254 } 255 } 256 257 // Fall back to x-unicode if no match was found 258 return nsGkAtoms::Unicode; 259 }