tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hashmgr.hxx (7809B)


      1 /* ***** BEGIN LICENSE BLOCK *****
      2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
      3 *
      4 * Copyright (C) 2002-2022 Németh László
      5 *
      6 * The contents of this file are subject to the Mozilla Public License Version
      7 * 1.1 (the "License"); you may not use this file except in compliance with
      8 * the License. You may obtain a copy of the License at
      9 * http://www.mozilla.org/MPL/
     10 *
     11 * Software distributed under the License is distributed on an "AS IS" basis,
     12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
     13 * for the specific language governing rights and limitations under the
     14 * License.
     15 *
     16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
     17 *
     18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
     19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
     20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
     21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
     22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
     23 *
     24 * Alternatively, the contents of this file may be used under the terms of
     25 * either the GNU General Public License Version 2 or later (the "GPL"), or
     26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
     27 * in which case the provisions of the GPL or the LGPL are applicable instead
     28 * of those above. If you wish to allow use of your version of this file only
     29 * under the terms of either the GPL or the LGPL, and not to allow others to
     30 * use your version of this file under the terms of the MPL, indicate your
     31 * decision by deleting the provisions above and replace them with the notice
     32 * and other provisions required by the GPL or the LGPL. If you do not delete
     33 * the provisions above, a recipient may use your version of this file under
     34 * the terms of any one of the MPL, the GPL or the LGPL.
     35 *
     36 * ***** END LICENSE BLOCK ***** */
     37 /*
     38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
     39 * And Contributors.  All rights reserved.
     40 *
     41 * Redistribution and use in source and binary forms, with or without
     42 * modification, are permitted provided that the following conditions
     43 * are met:
     44 *
     45 * 1. Redistributions of source code must retain the above copyright
     46 *    notice, this list of conditions and the following disclaimer.
     47 *
     48 * 2. Redistributions in binary form must reproduce the above copyright
     49 *    notice, this list of conditions and the following disclaimer in the
     50 *    documentation and/or other materials provided with the distribution.
     51 *
     52 * 3. All modifications to the source code must be clearly marked as
     53 *    such.  Binary redistributions based on modified source code
     54 *    must be clearly marked as modified versions in the documentation
     55 *    and/or other materials provided with the distribution.
     56 *
     57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
     58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
     61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68 * SUCH DAMAGE.
     69 */
     70 
     71 #ifndef HASHMGR_HXX_
     72 #define HASHMGR_HXX_
     73 
     74 #include <stdio.h>
     75 #include <stdint.h>
     76 #include <memory>
     77 #include <string>
     78 #include <vector>
     79 
     80 #include "htypes.hxx"
     81 #include "filemgr.hxx"
     82 #include "w_char.hxx"
     83 
     84 enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
     85 
     86 // morphological description of a dictionary item can contain
     87 // arbitrary number "ph:" (MORPH_PHON) fields to store typical
     88 // phonetic or other misspellings of that word.
     89 // ratio of lines/lines with "ph:" in the dic file: 1/MORPH_PHON_RATIO
     90 #define MORPH_PHON_RATIO 500
     91 
     92 class HashMgr {
     93  int tablesize;
     94  struct hentry** tableptr;
     95  flag flag_mode;
     96  int complexprefixes;
     97  int utf8;
     98  unsigned short forbiddenword;
     99  int langnum;
    100  std::string enc;
    101  std::string lang;
    102  struct cs_info* csconv;
    103  std::string ignorechars;
    104  std::vector<w_char> ignorechars_utf16;
    105  int numaliasf;  // flag vector `compression' with aliases
    106  unsigned short** aliasf;
    107  unsigned short* aliasflen;
    108  int numaliasm;  // morphological desciption `compression' with aliases
    109  char** aliasm;
    110  // reptable created from REP table of aff file and from "ph:" fields
    111  // of the dic file. It contains phonetic and other common misspellings
    112  // (letters, letter groups and words) for better suggestions
    113  std::vector<replentry> reptable;
    114 
    115 public:
    116  HashMgr(const char* tpath, const char* apath, const char* key = NULL);
    117  ~HashMgr();
    118 
    119  struct hentry* lookup(const char*) const;
    120  int hash(const char*) const;
    121  struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
    122 
    123  int add(const std::string& word);
    124  int add_with_affix(const std::string& word, const std::string& pattern);
    125  int remove(const std::string& word);
    126 private:
    127  // Only internal consumers are allowed to arena-allocate.
    128  int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const;
    129 public:
    130  int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const {
    131    return decode_flags(result, flags, af, /* arena = */ false);
    132  }
    133  bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const;
    134  unsigned short decode_flag(const char* flag) const;
    135  char* encode_flag(unsigned short flag) const;
    136  int is_aliasf() const;
    137  int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const;
    138  int is_aliasm() const;
    139  char* get_aliasm(int index) const;
    140  const std::vector<replentry>& get_reptable() const;
    141 
    142 private:
    143  int get_clen_and_captype(const std::string& word, int* captype);
    144  int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf);
    145  int load_tables(const char* tpath, const char* key);
    146  int add_word(const std::string& word,
    147               int wcl,
    148               unsigned short* ap,
    149               int al,
    150               const std::string* desc,
    151               bool onlyupcase,
    152               int captype);
    153  int load_config(const char* affpath, const char* key);
    154  bool parse_aliasf(const std::string& line, FileMgr* af);
    155  int add_hidden_capitalized_word(const std::string& word,
    156                                  int wcl,
    157                                  unsigned short* flags,
    158                                  int al,
    159                                  const std::string* dp,
    160                                  int captype);
    161  bool parse_aliasm(const std::string& line, FileMgr* af);
    162  bool parse_reptable(const std::string& line, FileMgr* af);
    163  int remove_forbidden_flag(const std::string& word);
    164 
    165  // Our Mozilla fork uses a simple arena allocator for certain strings which
    166  // persist for the lifetime of the HashMgr in order to avoid heap fragmentation.
    167  // It's a simple bump-allocator, so we can't actually free() memory midway
    168  // through the lifecycle, but we have a dummy free() implementation to ensure
    169  // that our calls to arena_alloc() and arena_free() are balanced.
    170  void* arena_alloc(int num_bytes);
    171  void* arena_alloc(int num_bytes) const {
    172    return const_cast<HashMgr*>(this)->arena_alloc(num_bytes);
    173  }
    174  void arena_free(void* ptr);
    175 
    176  std::vector<std::unique_ptr<uint8_t[]>> arena;
    177  int current_chunk_size = 0;
    178  int current_chunk_offset = 0;
    179  int outstanding_arena_allocations = 0;
    180 };
    181 
    182 #endif