tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

RLBoxHunspell.cpp (8991B)


      1 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "mozilla/Assertions.h"
      7 #include "RLBoxHunspell.h"
      8 #include "mozHunspellRLBoxGlue.h"
      9 #include "mozHunspellRLBoxHost.h"
     10 #include "nsThread.h"
     11 
     12 using namespace rlbox;
     13 using namespace mozilla;
     14 
     15 // Helper function for allocating and copying std::string into sandbox
     16 static tainted_hunspell<char*> allocStrInSandbox(
     17    rlbox_sandbox_hunspell& aSandbox, const std::string& str) {
     18  size_t size = str.size() + 1;
     19  tainted_hunspell<char*> t_str = aSandbox.malloc_in_sandbox<char>(size);
     20  if (t_str) {
     21    rlbox::memcpy(aSandbox, t_str, str.c_str(), size);
     22  }
     23  return t_str;
     24 }
     25 
     26 /* static */
     27 RLBoxHunspell* RLBoxHunspell::Create(const nsCString& affpath,
     28                                     const nsCString& dpath) {
     29  MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread());
     30 
     31  mozilla::UniquePtr<rlbox_sandbox_hunspell> sandbox(
     32      new rlbox_sandbox_hunspell());
     33 
     34 #if defined(MOZ_WASM_SANDBOXING_HUNSPELL) && !defined(HAVE_64BIT_BUILD)
     35  // By default, the rlbox sandbox size is smaller on 32-bit builds than the max
     36  // 4GB. We may need to ask for a larger sandbox size for hunspell to
     37  // spellcheck in some locales See Bug 1739669 for more details
     38 
     39  // We first get the size of the dictionary. This is actually the first read we
     40  // try on dpath and it might fail for whatever filesystem reasons (invalid
     41  // path, unaccessible, ...).
     42  Result<int64_t, nsresult> dictSizeResult =
     43      mozHunspellFileMgrHost::GetSize(dpath);
     44  NS_ENSURE_TRUE(dictSizeResult.isOk(), nullptr);
     45 
     46  int64_t dictSize = dictSizeResult.unwrap();
     47  NS_ENSURE_TRUE(dictSize >= 0, nullptr);
     48 
     49  // Next, we compute the expected memory needed for hunspell spell checking.
     50  // This will vary based on the size of the dictionary file, which varies by
     51  // locale — so we size the sandbox by multiplying the file size by 4.8. This
     52  // allows the 1.5MB en_US dictionary to fit in an 8MB sandbox. See bug 1739669
     53  // and bug 1739761 for the analysis behind this.
     54  const uint64_t expectedMaxMemory = static_cast<uint64_t>(4.8 * dictSize);
     55 
     56  // Get a capacity of at least the expected size
     57  const w2c_mem_capacity capacity = get_valid_wasm2c_memory_capacity(
     58      expectedMaxMemory, true /* wasm's 32-bit memory */);
     59 
     60  bool success = sandbox->create_sandbox(/* shouldAbortOnFailure = */ false,
     61                                         &capacity, "rlbox_wasm2c_hunspell");
     62 #elif defined(MOZ_WASM_SANDBOXING_HUNSPELL)
     63  bool success = sandbox->create_sandbox(/* shouldAbortOnFailure = */ false);
     64 #else
     65  sandbox->create_sandbox();
     66  const bool success = true;
     67 #endif
     68 
     69  NS_ENSURE_TRUE(success, nullptr);
     70 
     71  mozilla::UniquePtr<rlbox_sandbox_hunspell, RLBoxDeleter> sandbox_initialized(
     72      sandbox.release());
     73 
     74  // Add the aff and dict files to allow list
     75  if (!affpath.IsEmpty()) {
     76    mozHunspellCallbacks::AllowFile(affpath);
     77  }
     78  if (!dpath.IsEmpty()) {
     79    mozHunspellCallbacks::AllowFile(dpath);
     80  }
     81 
     82  // TODO Bug 1788857: Verify error handling in case of inaccessible file
     83  return new RLBoxHunspell(std::move(sandbox_initialized), affpath, dpath);
     84 }
     85 
     86 RLBoxHunspell::RLBoxHunspell(
     87    mozilla::UniquePtr<rlbox_sandbox_hunspell, RLBoxDeleter> aSandbox,
     88    const nsCString& affpath, const nsCString& dpath)
     89    : mSandbox(std::move(aSandbox)), mHandle(nullptr) {
     90  // Register callbacks
     91  mCreateFilemgr =
     92      mSandbox->register_callback(mozHunspellCallbacks::CreateFilemgr);
     93  mGetLine = mSandbox->register_callback(mozHunspellCallbacks::GetLine);
     94  mGetLineNum = mSandbox->register_callback(mozHunspellCallbacks::GetLineNum);
     95  mDestructFilemgr =
     96      mSandbox->register_callback(mozHunspellCallbacks::DestructFilemgr);
     97  mHunspellToUpperCase =
     98      mSandbox->register_callback(mozHunspellCallbacks::ToUpperCase);
     99  mHunspellToLowerCase =
    100      mSandbox->register_callback(mozHunspellCallbacks::ToLowerCase);
    101  mHunspellGetCurrentCS =
    102      mSandbox->register_callback(mozHunspellCallbacks::GetCurrentCS);
    103 
    104  mSandbox->invoke_sandbox_function(RegisterHunspellCallbacks, mCreateFilemgr,
    105                                    mGetLine, mGetLineNum, mDestructFilemgr,
    106                                    mHunspellToUpperCase, mHunspellToLowerCase,
    107                                    mHunspellGetCurrentCS);
    108 
    109  // Copy the affpath and dpath into the sandbox
    110  // These allocations should definitely succeed as these are first allocations
    111  // inside the sandbox.
    112  tainted_hunspell<char*> t_affpath =
    113      allocStrInSandbox(*mSandbox, affpath.get());
    114  MOZ_RELEASE_ASSERT(t_affpath);
    115 
    116  tainted_hunspell<char*> t_dpath = allocStrInSandbox(*mSandbox, dpath.get());
    117  MOZ_RELEASE_ASSERT(t_dpath);
    118 
    119  // Create handle
    120  mHandle = mSandbox->invoke_sandbox_function(
    121      Hunspell_create, rlbox::sandbox_const_cast<const char*>(t_affpath),
    122      rlbox::sandbox_const_cast<const char*>(t_dpath));
    123  MOZ_RELEASE_ASSERT(mHandle);
    124 
    125  mSandbox->free_in_sandbox(t_dpath);
    126  mSandbox->free_in_sandbox(t_affpath);
    127 
    128  // Get dictionary encoding
    129  tainted_hunspell<char*> t_enc =
    130      mSandbox->invoke_sandbox_function(Hunspell_get_dic_encoding, mHandle);
    131  t_enc.copy_and_verify_string([&](std::unique_ptr<char[]> enc) {
    132    size_t len = std::strlen(enc.get());
    133    mDicEncoding = std::string(enc.get(), len);
    134  });
    135 }
    136 
    137 RLBoxHunspell::~RLBoxHunspell() {
    138  MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread());
    139  // Call hunspell's destroy which frees mHandle
    140  mSandbox->invoke_sandbox_function(Hunspell_destroy, mHandle);
    141  mHandle = nullptr;
    142 
    143  // Unregister callbacks
    144  mDestructFilemgr.unregister();
    145  mGetLineNum.unregister();
    146  mGetLine.unregister();
    147  mCreateFilemgr.unregister();
    148  mHunspellToUpperCase.unregister();
    149  mHunspellToLowerCase.unregister();
    150  mHunspellGetCurrentCS.unregister();
    151 
    152  // Clear any callback data and allow list
    153  mozHunspellCallbacks::Clear();
    154 }
    155 
    156 // Invoking hunspell with words larger than a certain size will cause the
    157 // Hunspell sandbox to run out of memory. So we pick an arbitrary limit of
    158 // 200000 here to ensure this doesn't happen.
    159 static const size_t gWordSizeLimit = 200000;
    160 
    161 int RLBoxHunspell::spell(const std::string& stdWord) {
    162  MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread());
    163 
    164  const int ok = 1;
    165 
    166  if (stdWord.length() >= gWordSizeLimit) {
    167    // Fail gracefully assuming the word is spelt correctly
    168    return ok;
    169  }
    170 
    171  // Copy word into the sandbox
    172  tainted_hunspell<char*> t_word = allocStrInSandbox(*mSandbox, stdWord);
    173  if (!t_word) {
    174    // Ran out of memory in the hunspell sandbox
    175    // Fail gracefully assuming the word is spelt correctly
    176    return ok;
    177  }
    178 
    179  // Check word
    180  int good = mSandbox
    181                 ->invoke_sandbox_function(
    182                     Hunspell_spell, mHandle,
    183                     rlbox::sandbox_const_cast<const char*>(t_word))
    184                 .copy_and_verify([](int good) { return good; });
    185  mSandbox->free_in_sandbox(t_word);
    186  return good;
    187 }
    188 
    189 const std::string& RLBoxHunspell::get_dict_encoding() const {
    190  return mDicEncoding;
    191 }
    192 
    193 // This function fails gracefully - if we run out of memory in the hunspell
    194 // sandbox, we return empty suggestion list
    195 std::vector<std::string> RLBoxHunspell::suggest(const std::string& stdWord) {
    196  MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread());
    197 
    198  if (stdWord.length() >= gWordSizeLimit) {
    199    return {};
    200  }
    201 
    202  // Copy word into the sandbox
    203  tainted_hunspell<char*> t_word = allocStrInSandbox(*mSandbox, stdWord);
    204  if (!t_word) {
    205    return {};
    206  }
    207 
    208  // Allocate suggestion list in the sandbox
    209  tainted_hunspell<char***> t_slst = mSandbox->malloc_in_sandbox<char**>();
    210  if (!t_slst) {
    211    // Free the earlier allocation
    212    mSandbox->free_in_sandbox(t_word);
    213    return {};
    214  }
    215 
    216  *t_slst = nullptr;
    217 
    218  // Get suggestions
    219  int nr = mSandbox
    220               ->invoke_sandbox_function(
    221                   Hunspell_suggest, mHandle, t_slst,
    222                   rlbox::sandbox_const_cast<const char*>(t_word))
    223               .copy_and_verify([](int nr) {
    224                 MOZ_RELEASE_ASSERT(nr >= 0);
    225                 return nr;
    226               });
    227 
    228  tainted_hunspell<char**> t_slst_ref = *t_slst;
    229 
    230  std::vector<std::string> suggestions;
    231  if (nr > 0 && t_slst_ref != nullptr) {
    232    // Copy suggestions from sandbox
    233    suggestions.reserve(nr);
    234 
    235    for (int i = 0; i < nr; i++) {
    236      tainted_hunspell<char*> t_sug = t_slst_ref[i];
    237 
    238      if (t_sug) {
    239        t_sug.copy_and_verify_string(
    240            [&](std::string sug) { suggestions.push_back(std::move(sug)); });
    241        // free the suggestion string allocated by the sandboxed hunspell
    242        mSandbox->free_in_sandbox(t_sug);
    243      }
    244    }
    245 
    246    // free the suggestion list allocated by the sandboxed hunspell
    247    mSandbox->free_in_sandbox(t_slst_ref);
    248  }
    249 
    250  mSandbox->free_in_sandbox(t_word);
    251  mSandbox->free_in_sandbox(t_slst);
    252  return suggestions;
    253 }