tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

affixmgr.hxx (14700B)


      1 /* ***** BEGIN LICENSE BLOCK *****
      2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
      3 *
      4 * Copyright (C) 2002-2022 Németh László
      5 *
      6 * The contents of this file are subject to the Mozilla Public License Version
      7 * 1.1 (the "License"); you may not use this file except in compliance with
      8 * the License. You may obtain a copy of the License at
      9 * http://www.mozilla.org/MPL/
     10 *
     11 * Software distributed under the License is distributed on an "AS IS" basis,
     12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
     13 * for the specific language governing rights and limitations under the
     14 * License.
     15 *
     16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
     17 *
     18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
     19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
     20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
     21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
     22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
     23 *
     24 * Alternatively, the contents of this file may be used under the terms of
     25 * either the GNU General Public License Version 2 or later (the "GPL"), or
     26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
     27 * in which case the provisions of the GPL or the LGPL are applicable instead
     28 * of those above. If you wish to allow use of your version of this file only
     29 * under the terms of either the GPL or the LGPL, and not to allow others to
     30 * use your version of this file under the terms of the MPL, indicate your
     31 * decision by deleting the provisions above and replace them with the notice
     32 * and other provisions required by the GPL or the LGPL. If you do not delete
     33 * the provisions above, a recipient may use your version of this file under
     34 * the terms of any one of the MPL, the GPL or the LGPL.
     35 *
     36 * ***** END LICENSE BLOCK ***** */
     37 /*
     38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
     39 * And Contributors.  All rights reserved.
     40 *
     41 * Redistribution and use in source and binary forms, with or without
     42 * modification, are permitted provided that the following conditions
     43 * are met:
     44 *
     45 * 1. Redistributions of source code must retain the above copyright
     46 *    notice, this list of conditions and the following disclaimer.
     47 *
     48 * 2. Redistributions in binary form must reproduce the above copyright
     49 *    notice, this list of conditions and the following disclaimer in the
     50 *    documentation and/or other materials provided with the distribution.
     51 *
     52 * 3. All modifications to the source code must be clearly marked as
     53 *    such.  Binary redistributions based on modified source code
     54 *    must be clearly marked as modified versions in the documentation
     55 *    and/or other materials provided with the distribution.
     56 *
     57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
     58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
     61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68 * SUCH DAMAGE.
     69 */
     70 
     71 #ifndef AFFIXMGR_HXX_
     72 #define AFFIXMGR_HXX_
     73 
     74 #include <stdio.h>
     75 
     76 #include <string>
     77 #include <vector>
     78 
     79 #include "atypes.hxx"
     80 #include "baseaffix.hxx"
     81 #include "hashmgr.hxx"
     82 #include "phonet.hxx"
     83 #include "replist.hxx"
     84 
     85 // check flag duplication
     86 #define dupSFX (1 << 0)
     87 #define dupPFX (1 << 1)
     88 
     89 class PfxEntry;
     90 class SfxEntry;
     91 
     92 class AffixMgr {
     93  PfxEntry* pStart[SETSIZE];
     94  SfxEntry* sStart[SETSIZE];
     95  PfxEntry* pFlag[SETSIZE];
     96  SfxEntry* sFlag[SETSIZE];
     97  const std::vector<HashMgr*>& alldic;
     98  const HashMgr* pHMgr;
     99  std::string keystring;
    100  std::string trystring;
    101  std::string encoding;
    102  struct cs_info* csconv;
    103  int utf8;
    104  int complexprefixes;
    105  FLAG compoundflag;
    106  FLAG compoundbegin;
    107  FLAG compoundmiddle;
    108  FLAG compoundend;
    109  FLAG compoundroot;
    110  FLAG compoundforbidflag;
    111  FLAG compoundpermitflag;
    112  int compoundmoresuffixes;
    113  int checkcompounddup;
    114  int checkcompoundrep;
    115  int checkcompoundcase;
    116  int checkcompoundtriple;
    117  int simplifiedtriple;
    118  FLAG forbiddenword;
    119  FLAG nosuggest;
    120  FLAG nongramsuggest;
    121  FLAG needaffix;
    122  int cpdmin;
    123  RepList* iconvtable;
    124  RepList* oconvtable;
    125  bool parsedmaptable;
    126  std::vector<mapentry> maptable;
    127  bool parsedbreaktable;
    128  std::vector<std::string> breaktable;
    129  bool parsedcheckcpd;
    130  std::vector<patentry> checkcpdtable;
    131  int simplifiedcpd;
    132  bool parseddefcpd;
    133  std::vector<flagentry> defcpdtable;
    134  phonetable* phone;
    135  int maxngramsugs;
    136  int maxcpdsugs;
    137  int maxdiff;
    138  int onlymaxdiff;
    139  int nosplitsugs;
    140  int sugswithdots;
    141  int cpdwordmax;
    142  int cpdmaxsyllable;
    143  std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit,
    144  std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding
    145  std::string cpdsyllablenum; // syllable count incrementing flag
    146  const char* pfxappnd;  // BUG: not stateless
    147  const char* sfxappnd;  // BUG: not stateless
    148  int sfxextra;          // BUG: not stateless
    149  FLAG sfxflag;          // BUG: not stateless
    150  char* derived;         // BUG: not stateless
    151  SfxEntry* sfx;         // BUG: not stateless
    152  PfxEntry* pfx;         // BUG: not stateless
    153  int checknum;
    154  std::string wordchars; // letters + spec. word characters
    155  std::vector<w_char> wordchars_utf16;
    156  std::string ignorechars; // letters + spec. word characters
    157  std::vector<w_char> ignorechars_utf16;
    158  std::string version;   // affix and dictionary file version string
    159  std::string lang;	 // language
    160  int langnum;
    161  FLAG lemma_present;
    162  FLAG circumfix;
    163  FLAG onlyincompound;
    164  FLAG keepcase;
    165  FLAG forceucase;
    166  FLAG warn;
    167  int forbidwarn;
    168  FLAG substandard;
    169  int checksharps;
    170  int fullstrip;
    171 
    172  int havecontclass;           // boolean variable
    173  char contclasses[CONTSIZE];  // flags of possible continuing classes (twofold
    174                               // affix)
    175 
    176 public:
    177  AffixMgr(const char* affpath, const std::vector<HashMgr*>& ptr, const char* key = NULL);
    178  ~AffixMgr();
    179  struct hentry* affix_check(const char* word,
    180                             int len,
    181                             const unsigned short needflag = (unsigned short)0,
    182                             char in_compound = IN_CPD_NOT);
    183  struct hentry* prefix_check(const char* word,
    184                              int len,
    185                              char in_compound,
    186                              const FLAG needflag = FLAG_NULL);
    187  inline int isSubset(const char* s1, const char* s2);
    188  struct hentry* prefix_check_twosfx(const char* word,
    189                                     int len,
    190                                     char in_compound,
    191                                     const FLAG needflag = FLAG_NULL);
    192  inline int isRevSubset(const char* s1, const char* end_of_s2, int len);
    193  struct hentry* suffix_check(const char* word,
    194                              int len,
    195                              int sfxopts,
    196                              PfxEntry* ppfx,
    197                              const FLAG cclass = FLAG_NULL,
    198                              const FLAG needflag = FLAG_NULL,
    199                              char in_compound = IN_CPD_NOT);
    200  struct hentry* suffix_check_twosfx(const char* word,
    201                                     int len,
    202                                     int sfxopts,
    203                                     PfxEntry* ppfx,
    204                                     const FLAG needflag = FLAG_NULL);
    205 
    206  std::string affix_check_morph(const char* word,
    207                                int len,
    208                                const FLAG needflag = FLAG_NULL,
    209                                char in_compound = IN_CPD_NOT);
    210  std::string prefix_check_morph(const char* word,
    211                                 int len,
    212                                 char in_compound,
    213                                 const FLAG needflag = FLAG_NULL);
    214  std::string suffix_check_morph(const char* word,
    215                                 int len,
    216                                 int sfxopts,
    217                                 PfxEntry* ppfx,
    218                                 const FLAG cclass = FLAG_NULL,
    219                                 const FLAG needflag = FLAG_NULL,
    220                                 char in_compound = IN_CPD_NOT);
    221 
    222  std::string prefix_check_twosfx_morph(const char* word,
    223                                        int len,
    224                                        char in_compound,
    225                                        const FLAG needflag = FLAG_NULL);
    226  std::string suffix_check_twosfx_morph(const char* word,
    227                                        int len,
    228                                        int sfxopts,
    229                                        PfxEntry* ppfx,
    230                                        const FLAG needflag = FLAG_NULL);
    231 
    232  std::string morphgen(const char* ts,
    233                       int wl,
    234                       const unsigned short* ap,
    235                       unsigned short al,
    236                       const char* morph,
    237                       const char* targetmorph,
    238                       int level);
    239 
    240  int expand_rootword(struct guessword* wlst,
    241                      int maxn,
    242                      const char* ts,
    243                      int wl,
    244                      const unsigned short* ap,
    245                      unsigned short al,
    246                      const char* bad,
    247                      int,
    248                      const char*);
    249 
    250  short get_syllable(const std::string& word);
    251  int cpdrep_check(const char* word, int len);
    252  int cpdwordpair_check(const char * word, int len);
    253  int cpdpat_check(const char* word,
    254                   int len,
    255                   hentry* r1,
    256                   hentry* r2,
    257                   const char affixed);
    258  int defcpd_check(hentry*** words,
    259                   short wnum,
    260                   hentry* rv,
    261                   hentry** rwords,
    262                   char all);
    263  int cpdcase_check(const char* word, int len);
    264  inline int candidate_check(const char* word, int len);
    265  void setcminmax(int* cmin, int* cmax, const char* word, int len);
    266  struct hentry* compound_check(const std::string& word,
    267                                short wordnum,
    268                                short numsyllable,
    269                                short maxwordnum,
    270                                short wnum,
    271                                hentry** words,
    272                                hentry** rwords,
    273                                char hu_mov_rule,
    274                                char is_sug,
    275                                int* info);
    276 
    277  int compound_check_morph(const char* word,
    278                           int len,
    279                           short wordnum,
    280                           short numsyllable,
    281                           short maxwordnum,
    282                           short wnum,
    283                           hentry** words,
    284                           hentry** rwords,
    285                           char hu_mov_rule,
    286                           std::string& result,
    287                           const std::string* partresult);
    288 
    289  std::vector<std::string> get_suffix_words(short unsigned* suff,
    290                       int len,
    291                       const char* root_word);
    292 
    293  struct hentry* lookup(const char* word);
    294  const std::vector<replentry>& get_reptable() const;
    295  RepList* get_iconvtable() const;
    296  RepList* get_oconvtable() const;
    297  struct phonetable* get_phonetable() const;
    298  const std::vector<mapentry>& get_maptable() const;
    299  const std::vector<std::string>& get_breaktable() const;
    300  const std::string& get_encoding();
    301  int get_langnum() const;
    302  char* get_key_string();
    303  char* get_try_string() const;
    304  const std::string& get_wordchars() const;
    305  const std::vector<w_char>& get_wordchars_utf16() const;
    306  const char* get_ignore() const;
    307  const std::vector<w_char>& get_ignore_utf16() const;
    308  int get_compound() const;
    309  FLAG get_compoundflag() const;
    310  FLAG get_forbiddenword() const;
    311  FLAG get_nosuggest() const;
    312  FLAG get_nongramsuggest() const;
    313  FLAG get_substandard() const;
    314  FLAG get_needaffix() const;
    315  FLAG get_onlyincompound() const;
    316  const char* get_derived() const;
    317  const std::string& get_version() const;
    318  int have_contclass() const;
    319  int get_utf8() const;
    320  int get_complexprefixes() const;
    321  char* get_suffixed(char) const;
    322  int get_maxngramsugs() const;
    323  int get_maxcpdsugs() const;
    324  int get_maxdiff() const;
    325  int get_onlymaxdiff() const;
    326  int get_nosplitsugs() const;
    327  int get_sugswithdots(void) const;
    328  FLAG get_keepcase(void) const;
    329  FLAG get_forceucase(void) const;
    330  FLAG get_warn(void) const;
    331  int get_forbidwarn(void) const;
    332  int get_checksharps(void) const;
    333  char* encode_flag(unsigned short aflag) const;
    334  int get_fullstrip() const;
    335 
    336 private:
    337  int parse_file(const char* affpath, const char* key);
    338  bool parse_flag(const std::string& line, unsigned short* out, FileMgr* af);
    339  bool parse_num(const std::string& line, int* out, FileMgr* af);
    340  bool parse_cpdsyllable(const std::string& line, FileMgr* af);
    341  bool parse_convtable(const std::string& line,
    342                      FileMgr* af,
    343                      RepList** rl,
    344                      const std::string& keyword);
    345  bool parse_phonetable(const std::string& line, FileMgr* af);
    346  bool parse_maptable(const std::string& line, FileMgr* af);
    347  bool parse_breaktable(const std::string& line, FileMgr* af);
    348  bool parse_checkcpdtable(const std::string& line, FileMgr* af);
    349  bool parse_defcpdtable(const std::string& line, FileMgr* af);
    350  bool parse_affix(const std::string& line, const char at, FileMgr* af, char* dupflags);
    351 
    352  void reverse_condition(std::string&);
    353  std::string& debugflag(std::string& result, unsigned short flag);
    354  int condlen(const char*);
    355  int encodeit(AffEntry& entry, const char* cs);
    356  int build_pfxtree(PfxEntry* pfxptr);
    357  int build_sfxtree(SfxEntry* sfxptr);
    358  int process_pfx_order();
    359  int process_sfx_order();
    360  PfxEntry* process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr);
    361  SfxEntry* process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr);
    362  int process_pfx_tree_to_list();
    363  int process_sfx_tree_to_list();
    364  int redundant_condition(char, const char* strip, int stripl, const char* cond, int);
    365  void finishFileMgr(FileMgr* afflst);
    366 };
    367 
    368 #endif