mozHunspell.cpp (18078B)
1 /******* BEGIN LICENSE BLOCK ******* 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 * 4 * The contents of this file are subject to the Mozilla Public License Version 5 * 1.1 (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * http://www.mozilla.org/MPL/ 8 * 9 * Software distributed under the License is distributed on an "AS IS" basis, 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 11 * for the specific language governing rights and limitations under the 12 * License. 13 * 14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) 15 * and László Németh (Hunspell). Portions created by the Initial Developers 16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. 17 * 18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) 19 * David Einstein (deinst@world.std.com) 20 * Michiel van Leeuwen (mvl@exedo.nl) 21 * Caolan McNamara (cmc@openoffice.org) 22 * László Németh (nemethl@gyorsposta.hu) 23 * Davide Prina 24 * Giuseppe Modugno 25 * Gianluca Turconi 26 * Simon Brouwer 27 * Noll Janos 28 * Biro Arpad 29 * Goldman Eleonora 30 * Sarlos Tamas 31 * Bencsath Boldizsar 32 * Halacsy Peter 33 * Dvornik Laszlo 34 * Gefferth Andras 35 * Nagy Viktor 36 * Varga Daniel 37 * Chris Halls 38 * Rene Engelhard 39 * Bram Moolenaar 40 * Dafydd Jones 41 * Harri Pitkanen 42 * Andras Timar 43 * Tor Lillqvist 44 * Jesper Kristensen (mail@jesperkristensen.dk) 45 * 46 * Alternatively, the contents of this file may be used under the terms of 47 * either the GNU General Public License Version 2 or later (the "GPL"), or 48 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 49 * in which case the provisions of the GPL or the LGPL are applicable instead 50 * of those above. If you wish to allow use of your version of this file only 51 * under the terms of either the GPL or the LGPL, and not to allow others to 52 * use your version of this file under the terms of the MPL, indicate your 53 * decision by deleting the provisions above and replace them with the notice 54 * and other provisions required by the GPL or the LGPL. If you do not delete 55 * the provisions above, a recipient may use your version of this file under 56 * the terms of any one of the MPL, the GPL or the LGPL. 57 * 58 ******* END LICENSE BLOCK *******/ 59 60 #include "mozHunspell.h" 61 #include "nsReadableUtils.h" 62 #include "nsString.h" 63 #include "nsIObserverService.h" 64 #include "nsIDirectoryEnumerator.h" 65 #include "nsIFile.h" 66 #include "nsUnicharUtils.h" 67 #include "nsCRT.h" 68 #include "mozInlineSpellChecker.h" 69 #include "nsIPrefBranch.h" 70 #include "nsIPrefService.h" 71 #include "nsNetUtil.h" 72 #include "prenv.h" 73 #include "mozilla/Components.h" 74 #include "mozilla/Services.h" 75 #include "mozilla/dom/ContentParent_NotifyUpdatedDictionaries.h" 76 77 #include <tuple> 78 79 using namespace mozilla; 80 81 NS_IMPL_CYCLE_COLLECTING_ADDREF(mozHunspell) 82 NS_IMPL_CYCLE_COLLECTING_RELEASE(mozHunspell) 83 84 NS_INTERFACE_MAP_BEGIN(mozHunspell) 85 NS_INTERFACE_MAP_ENTRY(mozISpellCheckingEngine) 86 NS_INTERFACE_MAP_ENTRY(nsIObserver) 87 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) 88 NS_INTERFACE_MAP_ENTRY(nsIMemoryReporter) 89 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, mozISpellCheckingEngine) 90 NS_INTERFACE_MAP_ENTRIES_CYCLE_COLLECTION(mozHunspell) 91 NS_INTERFACE_MAP_END 92 93 NS_IMPL_CYCLE_COLLECTION_WEAK(mozHunspell, mPersonalDictionary) 94 95 NS_IMPL_COMPONENT_FACTORY(mozHunspell) { 96 auto hunspell = MakeRefPtr<mozHunspell>(); 97 if (NS_SUCCEEDED(hunspell->Init())) { 98 return hunspell.forget().downcast<mozISpellCheckingEngine>(); 99 } 100 return nullptr; 101 } 102 103 mozHunspell::mozHunspell() { 104 #ifdef DEBUG 105 // There must be only one instance of this class: it reports memory based on 106 // a single static count in HunspellAllocator. 107 static bool hasRun = false; 108 MOZ_ASSERT(!hasRun); 109 hasRun = true; 110 #endif 111 } 112 113 nsresult mozHunspell::Init() { 114 LoadDictionaryList(false); 115 116 nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); 117 if (obs) { 118 obs->AddObserver(this, "profile-do-change", true); 119 obs->AddObserver(this, "profile-after-change", true); 120 } 121 122 mozilla::RegisterWeakMemoryReporter(this); 123 124 return NS_OK; 125 } 126 127 mozHunspell::~mozHunspell() { 128 mozilla::UnregisterWeakMemoryReporter(this); 129 130 mPersonalDictionary = nullptr; 131 mHunspells.Clear(); 132 } 133 134 NS_IMETHODIMP 135 mozHunspell::GetDictionaries(nsTArray<nsCString>& aDictionaries) { 136 MOZ_ASSERT(aDictionaries.IsEmpty()); 137 for (auto iter = mHunspells.ConstIter(); !iter.Done(); iter.Next()) { 138 if (iter.Data().mEnabled) { 139 aDictionaries.AppendElement(iter.Key()); 140 } 141 } 142 return NS_OK; 143 } 144 145 /* Set the Dictionaries. 146 * This also Loads the dictionaries and initializes the converter using the 147 * dictionaries converter 148 */ 149 NS_IMETHODIMP 150 mozHunspell::SetDictionaries(const nsTArray<nsCString>& aDictionaries) { 151 if (aDictionaries.IsEmpty()) { 152 mHunspells.Clear(); 153 return NS_OK; 154 } 155 156 // Disable any dictionaries we've already loaded that we're not 157 // going to use. 158 for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) { 159 if (!aDictionaries.Contains(iter.Key())) { 160 iter.Data().mEnabled = false; 161 } 162 } 163 164 bool firstDictionary = true; 165 for (const auto& dictionary : aDictionaries) { 166 NS_ConvertUTF8toUTF16 dict(dictionary); 167 nsIURI* affFile = mDictionaries.GetWeak(dict); 168 if (!affFile) { 169 return NS_ERROR_FILE_NOT_FOUND; 170 } 171 172 nsAutoCString affFileName; 173 nsresult rv = affFile->GetSpec(affFileName); 174 NS_ENSURE_SUCCESS(rv, rv); 175 176 if (auto entry = mHunspells.Lookup(dictionary)) { 177 if (entry.Data().mAffixFileName == affFileName) { 178 entry.Data().mEnabled = true; 179 continue; 180 } 181 } 182 183 DictionaryData dictionaryData; 184 dictionaryData.mAffixFileName = affFileName; 185 186 // Load the first dictionary now, we'll load the others lazily during 187 // checking. 188 if (firstDictionary) { 189 rv = dictionaryData.LoadIfNecessary(); 190 NS_ENSURE_SUCCESS(rv, rv); 191 firstDictionary = false; 192 } 193 194 mHunspells.InsertOrUpdate(dictionary, std::move(dictionaryData)); 195 } 196 197 // If we have a large number of dictionaries loaded, try freeing any disabled 198 // dictionaries to limit memory use. 199 if (mHunspells.Count() > 10) { 200 mHunspells.RemoveIf([](const auto& iter) { return !iter.Data().mEnabled; }); 201 } 202 203 return NS_OK; 204 } 205 206 NS_IMETHODIMP mozHunspell::GetPersonalDictionary( 207 mozIPersonalDictionary** aPersonalDictionary) { 208 *aPersonalDictionary = mPersonalDictionary; 209 NS_IF_ADDREF(*aPersonalDictionary); 210 return NS_OK; 211 } 212 213 NS_IMETHODIMP mozHunspell::SetPersonalDictionary( 214 mozIPersonalDictionary* aPersonalDictionary) { 215 mPersonalDictionary = aPersonalDictionary; 216 return NS_OK; 217 } 218 219 NS_IMETHODIMP mozHunspell::GetDictionaryList( 220 nsTArray<nsCString>& aDictionaries) { 221 MOZ_ASSERT(aDictionaries.IsEmpty()); 222 for (const auto& key : mDictionaries.Keys()) { 223 aDictionaries.AppendElement(NS_ConvertUTF16toUTF8(key)); 224 } 225 226 return NS_OK; 227 } 228 229 void mozHunspell::LoadDictionaryList(bool aNotifyChildProcesses) { 230 mDictionaries.Clear(); 231 232 nsresult rv; 233 234 // find built in dictionaries, or dictionaries specified in 235 // spellchecker.dictionary_path in prefs 236 nsCOMPtr<nsIFile> dictDir; 237 238 // check preferences first 239 nsCOMPtr<nsIPrefBranch> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID)); 240 if (prefs) { 241 nsAutoCString extDictPath; 242 rv = prefs->GetCharPref("spellchecker.dictionary_path", extDictPath); 243 if (NS_SUCCEEDED(rv)) { 244 // set the spellchecker.dictionary_path 245 rv = NS_NewNativeLocalFile(extDictPath, getter_AddRefs(dictDir)); 246 } 247 if (dictDir) { 248 LoadDictionariesFromDir(dictDir); 249 } 250 } 251 252 // find dictionaries in DICPATH 253 char* dicEnv = PR_GetEnv("DICPATH"); 254 if (dicEnv) { 255 // do a two-pass dance so dictionaries are loaded right-to-left as 256 // preference 257 nsTArray<nsCOMPtr<nsIFile>> dirs; 258 nsAutoCString env(dicEnv); // assume dicEnv is UTF-8 259 260 char* currPath = nullptr; 261 char* nextPaths = env.BeginWriting(); 262 while ((currPath = NS_strtok(":", &nextPaths))) { 263 nsCOMPtr<nsIFile> dir; 264 rv = NS_NewNativeLocalFile(nsCString(currPath), getter_AddRefs(dir)); 265 if (NS_SUCCEEDED(rv)) { 266 dirs.AppendElement(dir); 267 } 268 } 269 270 // load them in reverse order so they override each other properly 271 for (int32_t i = dirs.Length() - 1; i >= 0; i--) { 272 LoadDictionariesFromDir(dirs[i]); 273 } 274 } 275 276 // find dictionaries from restartless extensions 277 for (int32_t i = 0; i < mDynamicDirectories.Count(); i++) { 278 LoadDictionariesFromDir(mDynamicDirectories[i]); 279 } 280 281 for (const auto& dictionaryEntry : mDynamicDictionaries) { 282 mDictionaries.InsertOrUpdate(dictionaryEntry.GetKey(), 283 dictionaryEntry.GetData()); 284 } 285 286 DictionariesChanged(aNotifyChildProcesses); 287 } 288 289 void mozHunspell::DictionariesChanged(bool aNotifyChildProcesses) { 290 // Now we have finished updating the list of dictionaries, update the current 291 // dictionary and any editors which may use it. 292 mozInlineSpellChecker::UpdateCanEnableInlineSpellChecking(); 293 294 if (aNotifyChildProcesses) { 295 mozilla::dom::ContentParent_NotifyUpdatedDictionaries(); 296 } 297 298 // Check if the current dictionaries are still available. 299 // If not, try to replace it with other dictionaries of the same language. 300 if (!mHunspells.IsEmpty()) { 301 nsTArray<nsCString> dictionaries; 302 for (auto iter = mHunspells.ConstIter(); !iter.Done(); iter.Next()) { 303 if (iter.Data().mEnabled) { 304 dictionaries.AppendElement(iter.Key()); 305 } 306 } 307 nsresult rv = SetDictionaries(dictionaries); 308 if (NS_SUCCEEDED(rv)) return; 309 } 310 311 // If the current dictionaries are gone, and we don't have a good replacement, 312 // set no current dictionary. 313 if (!mHunspells.IsEmpty()) { 314 nsTArray<nsCString> empty; 315 SetDictionaries(empty); 316 } 317 } 318 319 NS_IMETHODIMP 320 mozHunspell::LoadDictionariesFromDir(nsIFile* aDir) { 321 nsresult rv; 322 323 bool check = false; 324 rv = aDir->Exists(&check); 325 if (NS_FAILED(rv) || !check) return NS_ERROR_UNEXPECTED; 326 327 rv = aDir->IsDirectory(&check); 328 if (NS_FAILED(rv) || !check) return NS_ERROR_UNEXPECTED; 329 330 nsCOMPtr<nsIDirectoryEnumerator> files; 331 rv = aDir->GetDirectoryEntries(getter_AddRefs(files)); 332 if (NS_FAILED(rv)) return NS_ERROR_UNEXPECTED; 333 334 nsCOMPtr<nsIFile> file; 335 while (NS_SUCCEEDED(files->GetNextFile(getter_AddRefs(file))) && file) { 336 nsAutoString leafName; 337 file->GetLeafName(leafName); 338 if (!StringEndsWith(leafName, u".dic"_ns)) continue; 339 340 nsAutoString dict(leafName); 341 dict.SetLength(dict.Length() - 4); // magic length of ".dic" 342 343 // check for the presence of the .aff file 344 leafName = dict; 345 leafName.AppendLiteral(".aff"); 346 file->SetLeafName(leafName); 347 rv = file->Exists(&check); 348 if (NS_FAILED(rv) || !check) continue; 349 350 // Replace '_' separator with '-' 351 dict.ReplaceChar('_', '-'); 352 353 nsCOMPtr<nsIURI> uri; 354 rv = NS_NewFileURI(getter_AddRefs(uri), file); 355 NS_ENSURE_SUCCESS(rv, rv); 356 357 mDictionaries.InsertOrUpdate(dict, uri); 358 } 359 360 return NS_OK; 361 } 362 363 nsresult mozHunspell::DictionaryData::ConvertCharset(const nsAString& aStr, 364 std::string& aDst) { 365 if (NS_WARN_IF(!mEncoder)) { 366 return NS_ERROR_NOT_INITIALIZED; 367 } 368 369 auto src = Span(aStr.BeginReading(), aStr.Length()); 370 CheckedInt<size_t> needed = 371 mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(src.Length()); 372 if (!needed.isValid()) { 373 return NS_ERROR_OUT_OF_MEMORY; 374 } 375 376 aDst.resize(needed.value()); 377 378 char* dstPtr = &aDst[0]; 379 auto dst = Span(reinterpret_cast<uint8_t*>(dstPtr), needed.value()); 380 381 uint32_t result; 382 size_t written; 383 std::tie(result, std::ignore, written) = 384 mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true); 385 MOZ_ASSERT(result != kOutputFull); 386 if (result != kInputEmpty) { 387 return NS_ERROR_UENC_NOMAPPING; 388 } 389 aDst.resize(written); 390 mEncoder->Encoding()->NewEncoderInto(*mEncoder); 391 return NS_OK; 392 } 393 394 nsresult mozHunspell::DictionaryData::LoadIfNecessary() { 395 if (mHunspell && mEncoder && mDecoder) { 396 return NS_OK; 397 } 398 399 if (mLoadFailed) { 400 return NS_ERROR_FAILURE; 401 } 402 403 nsCString dictFileName = mAffixFileName; 404 int32_t dotPos = dictFileName.RFindChar('.'); 405 if (dotPos == -1) { 406 mLoadFailed = true; 407 return NS_ERROR_FAILURE; 408 } 409 dictFileName.SetLength(dotPos); 410 dictFileName.AppendLiteral(".dic"); 411 412 UniquePtr<RLBoxHunspell> hunspell( 413 RLBoxHunspell::Create(mAffixFileName, dictFileName)); 414 if (!hunspell) { 415 mLoadFailed = true; 416 // TODO Bug 1788857: Verify error propagation in case of inaccessible file 417 return NS_ERROR_OUT_OF_MEMORY; 418 } 419 mHunspell = std::move(hunspell); 420 auto encoding = 421 Encoding::ForLabelNoReplacement(mHunspell->get_dict_encoding()); 422 if (!encoding) { 423 mLoadFailed = true; 424 return NS_ERROR_UCONV_NOCONV; 425 } 426 mEncoder = encoding->NewEncoder(); 427 mDecoder = encoding->NewDecoderWithoutBOMHandling(); 428 return NS_OK; 429 } 430 431 NS_IMETHODIMP 432 mozHunspell::CollectReports(nsIHandleReportCallback* aHandleReport, 433 nsISupports* aData, bool aAnonymize) { 434 MOZ_COLLECT_REPORT("explicit/spell-check", KIND_HEAP, UNITS_BYTES, 435 HunspellAllocator::MemoryAllocated(), 436 "Memory used by the spell-checking engine."); 437 438 return NS_OK; 439 } 440 441 NS_IMETHODIMP 442 mozHunspell::Check(const nsAString& aWord, bool* aResult) { 443 if (NS_WARN_IF(!aResult)) { 444 return NS_ERROR_INVALID_ARG; 445 } 446 447 if (NS_WARN_IF(mHunspells.IsEmpty())) { 448 return NS_ERROR_FAILURE; 449 } 450 451 *aResult = true; 452 for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) { 453 if (!iter.Data().mEnabled) { 454 continue; 455 } 456 457 nsresult rv = iter.Data().LoadIfNecessary(); 458 if (NS_FAILED(rv)) { 459 continue; 460 } 461 462 std::string charsetWord; 463 rv = iter.Data().ConvertCharset(aWord, charsetWord); 464 if (NS_FAILED(rv)) { 465 continue; 466 } 467 468 // Depending upon the encoding, we might end up with a string that begins 469 // with the null byte. Since the hunspell interface uses C-style strings, 470 // this appears like an empty string, and hunspell marks empty strings as 471 // spelled correctly. Skip these cases to allow another dictionary to have 472 // the chance to spellcheck them. 473 if (charsetWord.empty() || charsetWord[0] == 0) { 474 continue; 475 } 476 477 *aResult = iter.Data().mHunspell->spell(charsetWord); 478 if (*aResult) { 479 break; 480 } 481 } 482 483 if (!*aResult && mPersonalDictionary) { 484 return mPersonalDictionary->Check(aWord, aResult); 485 } 486 487 return NS_OK; 488 } 489 490 NS_IMETHODIMP 491 mozHunspell::Suggest(const nsAString& aWord, nsTArray<nsString>& aSuggestions) { 492 if (NS_WARN_IF(mHunspells.IsEmpty())) { 493 return NS_ERROR_FAILURE; 494 } 495 496 MOZ_ASSERT(aSuggestions.IsEmpty()); 497 498 for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) { 499 if (!iter.Data().mEnabled) { 500 continue; 501 } 502 503 nsresult rv = iter.Data().LoadIfNecessary(); 504 if (NS_FAILED(rv)) { 505 continue; 506 } 507 508 std::string charsetWord; 509 rv = iter.Data().ConvertCharset(aWord, charsetWord); 510 NS_ENSURE_SUCCESS(rv, rv); 511 512 std::vector<std::string> suggestions = 513 iter.Data().mHunspell->suggest(charsetWord); 514 if (!suggestions.empty()) { 515 aSuggestions.SetCapacity(aSuggestions.Length() + suggestions.size()); 516 for (Span<const char> charSrc : suggestions) { 517 // Convert the suggestion to utf16 518 auto src = AsBytes(charSrc); 519 nsresult rv = 520 iter.Data().mDecoder->Encoding()->DecodeWithoutBOMHandling( 521 src, *aSuggestions.AppendElement()); 522 NS_ENSURE_SUCCESS(rv, rv); 523 iter.Data().mDecoder->Encoding()->NewDecoderWithoutBOMHandlingInto( 524 *iter.Data().mDecoder); 525 } 526 } 527 } 528 529 return NS_OK; 530 } 531 532 NS_IMETHODIMP 533 mozHunspell::Observe(nsISupports* aSubj, const char* aTopic, 534 const char16_t* aData) { 535 NS_ASSERTION(!strcmp(aTopic, "profile-do-change") || 536 !strcmp(aTopic, "profile-after-change"), 537 "Unexpected observer topic"); 538 539 LoadDictionaryList(false); 540 541 return NS_OK; 542 } 543 544 NS_IMETHODIMP mozHunspell::AddDirectory(nsIFile* aDir) { 545 mDynamicDirectories.AppendObject(aDir); 546 LoadDictionaryList(true); 547 return NS_OK; 548 } 549 550 NS_IMETHODIMP mozHunspell::RemoveDirectory(nsIFile* aDir) { 551 mDynamicDirectories.RemoveObject(aDir); 552 LoadDictionaryList(true); 553 554 #ifdef MOZ_THUNDERBIRD 555 /* 556 * This notification is needed for Thunderbird. Thunderbird derives the 557 * dictionary from the document's "lang" attribute. If a dictionary is 558 * removed, we need to change the "lang" attribute. 559 */ 560 nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); 561 if (obs) { 562 obs->NotifyObservers(nullptr, SPELLCHECK_DICTIONARY_REMOVE_NOTIFICATION, 563 nullptr); 564 } 565 #endif 566 return NS_OK; 567 } 568 569 NS_IMETHODIMP mozHunspell::AddDictionary(const nsAString& aLang, 570 nsIURI* aFile) { 571 NS_ENSURE_TRUE(aFile, NS_ERROR_INVALID_ARG); 572 573 mDynamicDictionaries.InsertOrUpdate(aLang, aFile); 574 mDictionaries.InsertOrUpdate(aLang, aFile); 575 DictionariesChanged(true); 576 return NS_OK; 577 } 578 579 NS_IMETHODIMP mozHunspell::RemoveDictionary(const nsAString& aLang, 580 nsIURI* aFile, bool* aRetVal) { 581 NS_ENSURE_TRUE(aFile, NS_ERROR_INVALID_ARG); 582 *aRetVal = false; 583 584 nsCOMPtr<nsIURI> file = mDynamicDictionaries.Get(aLang); 585 bool equal; 586 if (file && NS_SUCCEEDED(file->Equals(aFile, &equal)) && equal) { 587 mDynamicDictionaries.Remove(aLang); 588 LoadDictionaryList(true); 589 *aRetVal = true; 590 } 591 return NS_OK; 592 }