RLBoxHunspell.cpp (8991B)
1 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "mozilla/Assertions.h" 7 #include "RLBoxHunspell.h" 8 #include "mozHunspellRLBoxGlue.h" 9 #include "mozHunspellRLBoxHost.h" 10 #include "nsThread.h" 11 12 using namespace rlbox; 13 using namespace mozilla; 14 15 // Helper function for allocating and copying std::string into sandbox 16 static tainted_hunspell<char*> allocStrInSandbox( 17 rlbox_sandbox_hunspell& aSandbox, const std::string& str) { 18 size_t size = str.size() + 1; 19 tainted_hunspell<char*> t_str = aSandbox.malloc_in_sandbox<char>(size); 20 if (t_str) { 21 rlbox::memcpy(aSandbox, t_str, str.c_str(), size); 22 } 23 return t_str; 24 } 25 26 /* static */ 27 RLBoxHunspell* RLBoxHunspell::Create(const nsCString& affpath, 28 const nsCString& dpath) { 29 MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread()); 30 31 mozilla::UniquePtr<rlbox_sandbox_hunspell> sandbox( 32 new rlbox_sandbox_hunspell()); 33 34 #if defined(MOZ_WASM_SANDBOXING_HUNSPELL) && !defined(HAVE_64BIT_BUILD) 35 // By default, the rlbox sandbox size is smaller on 32-bit builds than the max 36 // 4GB. We may need to ask for a larger sandbox size for hunspell to 37 // spellcheck in some locales See Bug 1739669 for more details 38 39 // We first get the size of the dictionary. This is actually the first read we 40 // try on dpath and it might fail for whatever filesystem reasons (invalid 41 // path, unaccessible, ...). 42 Result<int64_t, nsresult> dictSizeResult = 43 mozHunspellFileMgrHost::GetSize(dpath); 44 NS_ENSURE_TRUE(dictSizeResult.isOk(), nullptr); 45 46 int64_t dictSize = dictSizeResult.unwrap(); 47 NS_ENSURE_TRUE(dictSize >= 0, nullptr); 48 49 // Next, we compute the expected memory needed for hunspell spell checking. 50 // This will vary based on the size of the dictionary file, which varies by 51 // locale — so we size the sandbox by multiplying the file size by 4.8. This 52 // allows the 1.5MB en_US dictionary to fit in an 8MB sandbox. See bug 1739669 53 // and bug 1739761 for the analysis behind this. 54 const uint64_t expectedMaxMemory = static_cast<uint64_t>(4.8 * dictSize); 55 56 // Get a capacity of at least the expected size 57 const w2c_mem_capacity capacity = get_valid_wasm2c_memory_capacity( 58 expectedMaxMemory, true /* wasm's 32-bit memory */); 59 60 bool success = sandbox->create_sandbox(/* shouldAbortOnFailure = */ false, 61 &capacity, "rlbox_wasm2c_hunspell"); 62 #elif defined(MOZ_WASM_SANDBOXING_HUNSPELL) 63 bool success = sandbox->create_sandbox(/* shouldAbortOnFailure = */ false); 64 #else 65 sandbox->create_sandbox(); 66 const bool success = true; 67 #endif 68 69 NS_ENSURE_TRUE(success, nullptr); 70 71 mozilla::UniquePtr<rlbox_sandbox_hunspell, RLBoxDeleter> sandbox_initialized( 72 sandbox.release()); 73 74 // Add the aff and dict files to allow list 75 if (!affpath.IsEmpty()) { 76 mozHunspellCallbacks::AllowFile(affpath); 77 } 78 if (!dpath.IsEmpty()) { 79 mozHunspellCallbacks::AllowFile(dpath); 80 } 81 82 // TODO Bug 1788857: Verify error handling in case of inaccessible file 83 return new RLBoxHunspell(std::move(sandbox_initialized), affpath, dpath); 84 } 85 86 RLBoxHunspell::RLBoxHunspell( 87 mozilla::UniquePtr<rlbox_sandbox_hunspell, RLBoxDeleter> aSandbox, 88 const nsCString& affpath, const nsCString& dpath) 89 : mSandbox(std::move(aSandbox)), mHandle(nullptr) { 90 // Register callbacks 91 mCreateFilemgr = 92 mSandbox->register_callback(mozHunspellCallbacks::CreateFilemgr); 93 mGetLine = mSandbox->register_callback(mozHunspellCallbacks::GetLine); 94 mGetLineNum = mSandbox->register_callback(mozHunspellCallbacks::GetLineNum); 95 mDestructFilemgr = 96 mSandbox->register_callback(mozHunspellCallbacks::DestructFilemgr); 97 mHunspellToUpperCase = 98 mSandbox->register_callback(mozHunspellCallbacks::ToUpperCase); 99 mHunspellToLowerCase = 100 mSandbox->register_callback(mozHunspellCallbacks::ToLowerCase); 101 mHunspellGetCurrentCS = 102 mSandbox->register_callback(mozHunspellCallbacks::GetCurrentCS); 103 104 mSandbox->invoke_sandbox_function(RegisterHunspellCallbacks, mCreateFilemgr, 105 mGetLine, mGetLineNum, mDestructFilemgr, 106 mHunspellToUpperCase, mHunspellToLowerCase, 107 mHunspellGetCurrentCS); 108 109 // Copy the affpath and dpath into the sandbox 110 // These allocations should definitely succeed as these are first allocations 111 // inside the sandbox. 112 tainted_hunspell<char*> t_affpath = 113 allocStrInSandbox(*mSandbox, affpath.get()); 114 MOZ_RELEASE_ASSERT(t_affpath); 115 116 tainted_hunspell<char*> t_dpath = allocStrInSandbox(*mSandbox, dpath.get()); 117 MOZ_RELEASE_ASSERT(t_dpath); 118 119 // Create handle 120 mHandle = mSandbox->invoke_sandbox_function( 121 Hunspell_create, rlbox::sandbox_const_cast<const char*>(t_affpath), 122 rlbox::sandbox_const_cast<const char*>(t_dpath)); 123 MOZ_RELEASE_ASSERT(mHandle); 124 125 mSandbox->free_in_sandbox(t_dpath); 126 mSandbox->free_in_sandbox(t_affpath); 127 128 // Get dictionary encoding 129 tainted_hunspell<char*> t_enc = 130 mSandbox->invoke_sandbox_function(Hunspell_get_dic_encoding, mHandle); 131 t_enc.copy_and_verify_string([&](std::unique_ptr<char[]> enc) { 132 size_t len = std::strlen(enc.get()); 133 mDicEncoding = std::string(enc.get(), len); 134 }); 135 } 136 137 RLBoxHunspell::~RLBoxHunspell() { 138 MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread()); 139 // Call hunspell's destroy which frees mHandle 140 mSandbox->invoke_sandbox_function(Hunspell_destroy, mHandle); 141 mHandle = nullptr; 142 143 // Unregister callbacks 144 mDestructFilemgr.unregister(); 145 mGetLineNum.unregister(); 146 mGetLine.unregister(); 147 mCreateFilemgr.unregister(); 148 mHunspellToUpperCase.unregister(); 149 mHunspellToLowerCase.unregister(); 150 mHunspellGetCurrentCS.unregister(); 151 152 // Clear any callback data and allow list 153 mozHunspellCallbacks::Clear(); 154 } 155 156 // Invoking hunspell with words larger than a certain size will cause the 157 // Hunspell sandbox to run out of memory. So we pick an arbitrary limit of 158 // 200000 here to ensure this doesn't happen. 159 static const size_t gWordSizeLimit = 200000; 160 161 int RLBoxHunspell::spell(const std::string& stdWord) { 162 MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread()); 163 164 const int ok = 1; 165 166 if (stdWord.length() >= gWordSizeLimit) { 167 // Fail gracefully assuming the word is spelt correctly 168 return ok; 169 } 170 171 // Copy word into the sandbox 172 tainted_hunspell<char*> t_word = allocStrInSandbox(*mSandbox, stdWord); 173 if (!t_word) { 174 // Ran out of memory in the hunspell sandbox 175 // Fail gracefully assuming the word is spelt correctly 176 return ok; 177 } 178 179 // Check word 180 int good = mSandbox 181 ->invoke_sandbox_function( 182 Hunspell_spell, mHandle, 183 rlbox::sandbox_const_cast<const char*>(t_word)) 184 .copy_and_verify([](int good) { return good; }); 185 mSandbox->free_in_sandbox(t_word); 186 return good; 187 } 188 189 const std::string& RLBoxHunspell::get_dict_encoding() const { 190 return mDicEncoding; 191 } 192 193 // This function fails gracefully - if we run out of memory in the hunspell 194 // sandbox, we return empty suggestion list 195 std::vector<std::string> RLBoxHunspell::suggest(const std::string& stdWord) { 196 MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread()); 197 198 if (stdWord.length() >= gWordSizeLimit) { 199 return {}; 200 } 201 202 // Copy word into the sandbox 203 tainted_hunspell<char*> t_word = allocStrInSandbox(*mSandbox, stdWord); 204 if (!t_word) { 205 return {}; 206 } 207 208 // Allocate suggestion list in the sandbox 209 tainted_hunspell<char***> t_slst = mSandbox->malloc_in_sandbox<char**>(); 210 if (!t_slst) { 211 // Free the earlier allocation 212 mSandbox->free_in_sandbox(t_word); 213 return {}; 214 } 215 216 *t_slst = nullptr; 217 218 // Get suggestions 219 int nr = mSandbox 220 ->invoke_sandbox_function( 221 Hunspell_suggest, mHandle, t_slst, 222 rlbox::sandbox_const_cast<const char*>(t_word)) 223 .copy_and_verify([](int nr) { 224 MOZ_RELEASE_ASSERT(nr >= 0); 225 return nr; 226 }); 227 228 tainted_hunspell<char**> t_slst_ref = *t_slst; 229 230 std::vector<std::string> suggestions; 231 if (nr > 0 && t_slst_ref != nullptr) { 232 // Copy suggestions from sandbox 233 suggestions.reserve(nr); 234 235 for (int i = 0; i < nr; i++) { 236 tainted_hunspell<char*> t_sug = t_slst_ref[i]; 237 238 if (t_sug) { 239 t_sug.copy_and_verify_string( 240 [&](std::string sug) { suggestions.push_back(std::move(sug)); }); 241 // free the suggestion string allocated by the sandboxed hunspell 242 mSandbox->free_in_sandbox(t_sug); 243 } 244 } 245 246 // free the suggestion list allocated by the sandboxed hunspell 247 mSandbox->free_in_sandbox(t_slst_ref); 248 } 249 250 mSandbox->free_in_sandbox(t_word); 251 mSandbox->free_in_sandbox(t_slst); 252 return suggestions; 253 }