[ tor-browser ].git.dasho

hunspell.cxx (66656B)
      1 /* ***** BEGIN LICENSE BLOCK *****
      2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
      3 *
      4 * Copyright (C) 2002-2022 Németh László
      5 *
      6 * The contents of this file are subject to the Mozilla Public License Version
      7 * 1.1 (the "License"); you may not use this file except in compliance with
      8 * the License. You may obtain a copy of the License at
      9 * http://www.mozilla.org/MPL/
     10 *
     11 * Software distributed under the License is distributed on an "AS IS" basis,
     12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
     13 * for the specific language governing rights and limitations under the
     14 * License.
     15 *
     16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
     17 *
     18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
     19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
     20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
     21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
     22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
     23 *
     24 * Alternatively, the contents of this file may be used under the terms of
     25 * either the GNU General Public License Version 2 or later (the "GPL"), or
     26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
     27 * in which case the provisions of the GPL or the LGPL are applicable instead
     28 * of those above. If you wish to allow use of your version of this file only
     29 * under the terms of either the GPL or the LGPL, and not to allow others to
     30 * use your version of this file under the terms of the MPL, indicate your
     31 * decision by deleting the provisions above and replace them with the notice
     32 * and other provisions required by the GPL or the LGPL. If you do not delete
     33 * the provisions above, a recipient may use your version of this file under
     34 * the terms of any one of the MPL, the GPL or the LGPL.
     35 *
     36 * ***** END LICENSE BLOCK ***** */
     37 /*
     38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
     39 * And Contributors.  All rights reserved.
     40 *
     41 * Redistribution and use in source and binary forms, with or without
     42 * modification, are permitted provided that the following conditions
     43 * are met:
     44 *
     45 * 1. Redistributions of source code must retain the above copyright
     46 *    notice, this list of conditions and the following disclaimer.
     47 *
     48 * 2. Redistributions in binary form must reproduce the above copyright
     49 *    notice, this list of conditions and the following disclaimer in the
     50 *    documentation and/or other materials provided with the distribution.
     51 *
     52 * 3. All modifications to the source code must be clearly marked as
     53 *    such.  Binary redistributions based on modified source code
     54 *    must be clearly marked as modified versions in the documentation
     55 *    and/or other materials provided with the distribution.
     56 *
     57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
     58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
     61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68 * SUCH DAMAGE.
     69 */
     70 
     71 #include <stdlib.h>
     72 #include <string.h>
     73 #include <stdio.h>
     74 #include <time.h>
     75 
     76 #include "affixmgr.hxx"
     77 #include "hunspell.hxx"
     78 #include "suggestmgr.hxx"
     79 #include "hunspell.h"
     80 #include "csutil.hxx"
     81 
     82 #include <limits>
     83 #include <string>
     84 
     85 #define MAXWORDUTF8LEN (MAXWORDLEN * 3)
     86 
     87 class HunspellImpl
     88 {
     89 public:
     90  HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL);
     91  ~HunspellImpl();
     92  int add_dic(const char* dpath, const char* key = NULL);
     93  std::vector<std::string> suffix_suggest(const std::string& root_word);
     94  std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
     95  std::vector<std::string> generate(const std::string& word, const std::string& pattern);
     96  std::vector<std::string> stem(const std::string& word);
     97  std::vector<std::string> stem(const std::vector<std::string>& morph);
     98  std::vector<std::string> analyze(const std::string& word);
     99  int get_langnum() const;
    100  bool input_conv(const std::string& word, std::string& dest);
    101  bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
    102  std::vector<std::string> suggest(const std::string& word);
    103  const std::string& get_wordchars_cpp() const;
    104  const std::vector<w_char>& get_wordchars_utf16() const;
    105  const std::string& get_dict_encoding() const;
    106  int add(const std::string& word);
    107  int add_with_affix(const std::string& word, const std::string& example);
    108  int remove(const std::string& word);
    109  const std::string& get_version_cpp() const;
    110  struct cs_info* get_csconv();
    111 
    112  int spell(const char* word, int* info = NULL, char** root = NULL);
    113  int suggest(char*** slst, const char* word);
    114  int suffix_suggest(char*** slst, const char* root_word);
    115  void free_list(char*** slst, int n);
    116  char* get_dic_encoding();
    117  int analyze(char*** slst, const char* word);
    118  int stem(char*** slst, const char* word);
    119  int stem(char*** slst, char** morph, int n);
    120  int generate(char*** slst, const char* word, const char* word2);
    121  int generate(char*** slst, const char* word, char** desc, int n);
    122  const char* get_wordchars() const;
    123  const char* get_version() const;
    124  int input_conv(const char* word, char* dest, size_t destsize);
    125 
    126 private:
    127  AffixMgr* pAMgr;
    128  std::vector<HashMgr*> m_HMgrs;
    129  SuggestMgr* pSMgr;
    130  char* affixpath;
    131  std::string encoding;
    132  struct cs_info* csconv;
    133  int langnum;
    134  int utf8;
    135  int complexprefixes;
    136  std::vector<std::string> wordbreak;
    137 
    138 private:
    139  std::vector<std::string> analyze_internal(const std::string& word);
    140  bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL);
    141  std::vector<std::string> suggest_internal(const std::string& word,
    142                    bool& capitalized, size_t& abbreviated, int& captype);
    143  void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
    144  size_t cleanword2(std::string& dest,
    145                    std::vector<w_char>& dest_u,
    146                    const std::string& src,
    147                    int* pcaptype,
    148                    size_t* pabbrev);
    149  void clean_ignore(std::string& dest, const std::string& src);
    150  void mkinitcap(std::string& u8);
    151  int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
    152  int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
    153  void mkallcap(std::string& u8);
    154  int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
    155  struct hentry* checkword(const std::string& source, int* info, std::string* root);
    156  std::string sharps_u8_l1(const std::string& source);
    157  hentry*
    158  spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
    159  int is_keepcase(const hentry* rv);
    160  void insert_sug(std::vector<std::string>& slst, const std::string& word);
    161  void cat_result(std::string& result, const std::string& st);
    162  std::vector<std::string> spellml(const std::string& word);
    163  std::string get_xml_par(const std::string& par, std::string::size_type pos);
    164  std::string::size_type get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr);
    165  std::vector<std::string> get_xml_list(const std::string& list, std::string::size_type pos, const char* tag);
    166  int check_xml_par(const std::string& q, std::string::size_type pos, const char* attr, const char* value);
    167 private:
    168  HunspellImpl(const HunspellImpl&);
    169  HunspellImpl& operator=(const HunspellImpl&);
    170 };
    171 
    172 HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
    173  csconv = NULL;
    174  utf8 = 0;
    175  complexprefixes = 0;
    176  affixpath = mystrdup(affpath);
    177 
    178  /* first set up the hash manager */
    179  m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
    180 
    181  /* next set up the affix manager */
    182  /* it needs access to the hash manager lookup methods */
    183  pAMgr = new AffixMgr(affpath, m_HMgrs, key);
    184 
    185  /* get the preferred try string and the dictionary */
    186  /* encoding from the Affix Manager for that dictionary */
    187  char* try_string = pAMgr->get_try_string();
    188  encoding = pAMgr->get_encoding();
    189  langnum = pAMgr->get_langnum();
    190  utf8 = pAMgr->get_utf8();
    191  if (!utf8)
    192    csconv = get_current_cs(encoding);
    193  complexprefixes = pAMgr->get_complexprefixes();
    194  wordbreak = pAMgr->get_breaktable();
    195 
    196  /* and finally set up the suggestion manager */
    197  pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
    198  if (try_string)
    199    free(try_string);
    200 }
    201 
    202 HunspellImpl::~HunspellImpl() {
    203  delete pSMgr;
    204  delete pAMgr;
    205  for (size_t i = 0; i < m_HMgrs.size(); ++i)
    206    delete m_HMgrs[i];
    207  pSMgr = NULL;
    208  pAMgr = NULL;
    209 #ifdef MOZILLA_CLIENT
    210  delete[] csconv;
    211 #endif
    212  csconv = NULL;
    213  if (affixpath)
    214    free(affixpath);
    215  affixpath = NULL;
    216 }
    217 
    218 // load extra dictionaries
    219 int HunspellImpl::add_dic(const char* dpath, const char* key) {
    220  if (!affixpath)
    221    return 1;
    222  m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));
    223  return 0;
    224 }
    225 
    226 
    227 // make a copy of src at dest while removing all characters
    228 // specified in IGNORE rule
    229 void HunspellImpl::clean_ignore(std::string& dest,
    230                                const std::string& src) {
    231  dest.clear();
    232  dest.assign(src);
    233  const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
    234  if (ignoredchars != NULL) {
    235    if (utf8) {
    236      const std::vector<w_char>& ignoredchars_utf16 =
    237          pAMgr->get_ignore_utf16();
    238      remove_ignored_chars_utf(dest, ignoredchars_utf16);
    239    } else {
    240      remove_ignored_chars(dest, ignoredchars);
    241    }
    242  }
    243 }
    244 
    245 
    246 // make a copy of src at destination while removing all leading
    247 // blanks and removing any trailing periods after recording
    248 // their presence with the abbreviation flag
    249 // also since already going through character by character,
    250 // set the capitalization type
    251 // return the length of the "cleaned" (and UTF-8 encoded) word
    252 
    253 size_t HunspellImpl::cleanword2(std::string& dest,
    254                         std::vector<w_char>& dest_utf,
    255                         const std::string& src,
    256                         int* pcaptype,
    257                         size_t* pabbrev) {
    258  dest.clear();
    259  dest_utf.clear();
    260 
    261  // remove IGNORE characters from the string
    262  std::string w2;
    263  clean_ignore(w2, src);
    264 
    265  const char* q = w2.c_str();
    266 
    267  // first skip over any leading blanks
    268  while (*q == ' ')
    269    ++q;
    270 
    271  // now strip off any trailing periods (recording their presence)
    272  *pabbrev = 0;
    273  int nl = strlen(q);
    274  while ((nl > 0) && (*(q + nl - 1) == '.')) {
    275    nl--;
    276    (*pabbrev)++;
    277  }
    278 
    279  // if no characters are left it can't be capitalized
    280  if (nl <= 0) {
    281    *pcaptype = NOCAP;
    282    return 0;
    283  }
    284 
    285  dest.append(q, nl);
    286  nl = dest.size();
    287  if (utf8) {
    288    u8_u16(dest_utf, dest);
    289    *pcaptype = get_captype_utf8(dest_utf, langnum);
    290  } else {
    291    *pcaptype = get_captype(dest, csconv);
    292  }
    293  return nl;
    294 }
    295 
    296 void HunspellImpl::cleanword(std::string& dest,
    297                        const std::string& src,
    298                        int* pcaptype,
    299                        int* pabbrev) {
    300  dest.clear();
    301  const unsigned char* q = (const unsigned char*)src.c_str();
    302  int firstcap = 0;
    303 
    304  // first skip over any leading blanks
    305  while (*q == ' ')
    306    ++q;
    307 
    308  // now strip off any trailing periods (recording their presence)
    309  *pabbrev = 0;
    310  int nl = strlen((const char*)q);
    311  while ((nl > 0) && (*(q + nl - 1) == '.')) {
    312    nl--;
    313    (*pabbrev)++;
    314  }
    315 
    316  // if no characters are left it can't be capitalized
    317  if (nl <= 0) {
    318    *pcaptype = NOCAP;
    319    return;
    320  }
    321 
    322  // now determine the capitalization type of the first nl letters
    323  int ncap = 0;
    324  int nneutral = 0;
    325  int nc = 0;
    326 
    327  if (!utf8) {
    328    while (nl > 0) {
    329      nc++;
    330      if (csconv[(*q)].ccase)
    331        ncap++;
    332      if (csconv[(*q)].cupper == csconv[(*q)].clower)
    333        nneutral++;
    334      dest.push_back(*q++);
    335      nl--;
    336    }
    337    // remember to terminate the destination string
    338    firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
    339  } else {
    340    std::vector<w_char> t;
    341    u8_u16(t, src);
    342    for (size_t i = 0; i < t.size(); ++i) {
    343      unsigned short idx = (t[i].h << 8) + t[i].l;
    344      unsigned short low = unicodetolower(idx, langnum);
    345      if (idx != low)
    346        ncap++;
    347      if (unicodetoupper(idx, langnum) == low)
    348        nneutral++;
    349    }
    350    u16_u8(dest, t);
    351    if (ncap) {
    352      unsigned short idx = (t[0].h << 8) + t[0].l;
    353      firstcap = (idx != unicodetolower(idx, langnum));
    354    }
    355  }
    356 
    357  // now finally set the captype
    358  if (ncap == 0) {
    359    *pcaptype = NOCAP;
    360  } else if ((ncap == 1) && firstcap) {
    361    *pcaptype = INITCAP;
    362  } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
    363    *pcaptype = ALLCAP;
    364  } else if ((ncap > 1) && firstcap) {
    365    *pcaptype = HUHINITCAP;
    366  } else {
    367    *pcaptype = HUHCAP;
    368  }
    369 }
    370 
    371 void HunspellImpl::mkallcap(std::string& u8) {
    372  if (utf8) {
    373    std::vector<w_char> u16;
    374    u8_u16(u16, u8);
    375    ::mkallcap_utf(u16, langnum);
    376    u16_u8(u8, u16);
    377  } else {
    378    ::mkallcap(u8, csconv);
    379  }
    380 }
    381 
    382 int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
    383  if (utf8) {
    384    ::mkallsmall_utf(u16, langnum);
    385    u16_u8(u8, u16);
    386  } else {
    387    ::mkallsmall(u8, csconv);
    388  }
    389  return u8.size();
    390 }
    391 
    392 // convert UTF-8 sharp S codes to latin 1
    393 std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
    394  std::string dest(source);
    395  mystrrep(dest, "\xC3\x9F", "\xDF");
    396  return dest;
    397 }
    398 
    399 // recursive search for right ss - sharp s permutations
    400 hentry* HunspellImpl::spellsharps(std::string& base,
    401                              size_t n_pos,
    402                              int n,
    403                              int repnum,
    404                              int* info,
    405                              std::string* root) {
    406  size_t pos = base.find("ss", n_pos);
    407  if (pos != std::string::npos && (n < MAXSHARPS)) {
    408    base[pos] = '\xC3';
    409    base[pos + 1] = '\x9F';
    410    hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
    411    if (h)
    412      return h;
    413    base[pos] = 's';
    414    base[pos + 1] = 's';
    415    h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
    416    if (h)
    417      return h;
    418  } else if (repnum > 0) {
    419    if (utf8)
    420      return checkword(base, info, root);
    421    std::string tmp(sharps_u8_l1(base));
    422    return checkword(tmp, info, root);
    423  }
    424  return NULL;
    425 }
    426 
    427 int HunspellImpl::is_keepcase(const hentry* rv) {
    428  return pAMgr && rv->astr && pAMgr->get_keepcase() &&
    429         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
    430 }
    431 
    432 /* insert a word to the beginning of the suggestion array */
    433 void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
    434  slst.insert(slst.begin(), word);
    435 }
    436 
    437 bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
    438  bool r = spell_internal(word, info, root);
    439  if (r && root) {
    440    // output conversion
    441    RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
    442    if (rl) {
    443      std::string wspace;
    444      if (rl->conv(*root, wspace)) {
    445        *root = wspace;
    446      }
    447    }
    448  }
    449  return r;
    450 }
    451 
    452 bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) {
    453  struct hentry* rv = NULL;
    454 
    455  int info2 = 0;
    456  if (!info)
    457    info = &info2;
    458  else
    459    *info = 0;
    460 
    461  // Hunspell supports XML input of the simplified API (see manual)
    462  if (word == SPELL_XML)
    463    return true;
    464  if (utf8) {
    465    if (word.size() >= MAXWORDUTF8LEN)
    466      return false;
    467  } else {
    468    if (word.size() >= MAXWORDLEN)
    469      return false;
    470  }
    471  int captype = NOCAP;
    472  size_t abbv = 0;
    473  size_t wl = 0;
    474 
    475  std::string scw;
    476  std::vector<w_char> sunicw;
    477 
    478  // input conversion
    479  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
    480  {
    481    std::string wspace;
    482 
    483    bool convstatus = rl ? rl->conv(word, wspace) : false;
    484    if (convstatus)
    485      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
    486    else
    487      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
    488  }
    489 
    490 #ifdef MOZILLA_CLIENT
    491  // accept the abbreviated words without dots
    492  // workaround for the incomplete tokenization of Mozilla
    493  abbv = 1;
    494 #endif
    495 
    496  if (wl == 0 || m_HMgrs.empty())
    497    return true;
    498  if (root)
    499    root->clear();
    500 
    501  // allow numbers with dots, dashes and commas (but forbid double separators:
    502  // "..", "--" etc.)
    503  enum { NBEGIN, NNUM, NSEP };
    504  int nstate = NBEGIN;
    505  size_t i;
    506 
    507  for (i = 0; (i < wl); i++) {
    508    if ((scw[i] <= '9') && (scw[i] >= '0')) {
    509      nstate = NNUM;
    510    } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
    511      if ((nstate == NSEP) || (i == 0))
    512        break;
    513      nstate = NSEP;
    514    } else
    515      break;
    516  }
    517  if ((i == wl) && (nstate == NNUM))
    518    return true;
    519 
    520  switch (captype) {
    521    case HUHCAP:
    522    /* FALLTHROUGH */
    523    case HUHINITCAP:
    524      *info |= SPELL_ORIGCAP;
    525    /* FALLTHROUGH */
    526    case NOCAP:
    527      rv = checkword(scw, info, root);
    528      if ((abbv) && !(rv)) {
    529        std::string u8buffer(scw);
    530        u8buffer.push_back('.');
    531        rv = checkword(u8buffer, info, root);
    532      }
    533      break;
    534    case ALLCAP: {
    535      *info |= SPELL_ORIGCAP;
    536      rv = checkword(scw, info, root);
    537      if (rv)
    538        break;
    539      if (abbv) {
    540        std::string u8buffer(scw);
    541        u8buffer.push_back('.');
    542        rv = checkword(u8buffer, info, root);
    543        if (rv)
    544          break;
    545      }
    546      // Spec. prefix handling for Catalan, French, Italian:
    547      // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
    548      size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
    549      if (apos != std::string::npos) {
    550        mkallsmall2(scw, sunicw);
    551        //conversion may result in string with different len to pre-mkallsmall2
    552        //so re-scan
    553        if (apos != std::string::npos && apos < scw.size() - 1) {
    554          std::string part1 = scw.substr(0, apos+1);
    555          std::string part2 = scw.substr(apos+1);
    556          if (utf8) {
    557            std::vector<w_char> part1u, part2u;
    558            u8_u16(part1u, part1);
    559            u8_u16(part2u, part2);
    560            mkinitcap2(part2, part2u);
    561            scw = part1 + part2;
    562            sunicw = part1u;
    563            sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
    564            rv = checkword(scw, info, root);
    565            if (rv)
    566              break;
    567          } else {
    568            mkinitcap2(part2, sunicw);
    569            scw = part1 + part2;
    570            rv = checkword(scw, info, root);
    571            if (rv)
    572              break;
    573          }
    574          mkinitcap2(scw, sunicw);
    575          rv = checkword(scw, info, root);
    576          if (rv)
    577            break;
    578        }
    579      }
    580      if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
    581 
    582        mkallsmall2(scw, sunicw);
    583        std::string u8buffer(scw);
    584        rv = spellsharps(u8buffer, 0, 0, 0, info, root);
    585        if (!rv) {
    586          mkinitcap2(scw, sunicw);
    587          rv = spellsharps(scw, 0, 0, 0, info, root);
    588        }
    589        if ((abbv) && !(rv)) {
    590          u8buffer.push_back('.');
    591          rv = spellsharps(u8buffer, 0, 0, 0, info, root);
    592          if (!rv) {
    593            u8buffer = std::string(scw);
    594            u8buffer.push_back('.');
    595            rv = spellsharps(u8buffer, 0, 0, 0, info, root);
    596          }
    597        }
    598        if (rv)
    599          break;
    600      }
    601    }
    602      /* FALLTHROUGH */
    603    case INITCAP: {
    604      // handle special capitalization of dotted I
    605      bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0);
    606      *info |= SPELL_ORIGCAP;
    607      if (captype == ALLCAP) {
    608          mkallsmall2(scw, sunicw);
    609          mkinitcap2(scw, sunicw);
    610          if (Idot)
    611             scw.replace(0, 1, "\xc4\xb0");
    612      }
    613      if (captype == INITCAP)
    614        *info |= SPELL_INITCAP;
    615      rv = checkword(scw, info, root);
    616      if (captype == INITCAP)
    617        *info &= ~SPELL_INITCAP;
    618      // forbid bad capitalization
    619      // (for example, ijs -> Ijs instead of IJs in Dutch)
    620      // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
    621      if (*info & SPELL_FORBIDDEN) {
    622        rv = NULL;
    623        break;
    624      }
    625      if (rv && is_keepcase(rv) && (captype == ALLCAP))
    626        rv = NULL;
    627      if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh))
    628        break;
    629 
    630      mkallsmall2(scw, sunicw);
    631      std::string u8buffer(scw);
    632      mkinitcap2(scw, sunicw);
    633 
    634      rv = checkword(u8buffer, info, root);
    635      if (abbv && !rv) {
    636        u8buffer.push_back('.');
    637        rv = checkword(u8buffer, info, root);
    638        if (!rv) {
    639          u8buffer = scw;
    640          u8buffer.push_back('.');
    641          if (captype == INITCAP)
    642            *info |= SPELL_INITCAP;
    643          rv = checkword(u8buffer, info, root);
    644          if (captype == INITCAP)
    645            *info &= ~SPELL_INITCAP;
    646          if (rv && is_keepcase(rv) && (captype == ALLCAP))
    647            rv = NULL;
    648          break;
    649        }
    650      }
    651      if (rv && is_keepcase(rv) &&
    652          ((captype == ALLCAP) ||
    653           // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
    654           // in INITCAP form, too.
    655           !(pAMgr->get_checksharps() &&
    656             ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
    657              (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
    658        rv = NULL;
    659      break;
    660    }
    661  }
    662 
    663  if (rv) {
    664    if (pAMgr && pAMgr->get_warn() && rv->astr &&
    665        TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
    666      *info |= SPELL_WARN;
    667      if (pAMgr->get_forbidwarn())
    668        return false;
    669      return true;
    670    }
    671    return true;
    672  }
    673 
    674  // recursive breaking at break points
    675  if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) {
    676 
    677    int nbr = 0;
    678    wl = scw.size();
    679 
    680    // calculate break points for recursion limit
    681    for (size_t j = 0; j < wordbreak.size(); ++j) {
    682      size_t pos = 0;
    683      while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {
    684        ++nbr;
    685        pos += wordbreak[j].size();
    686      }
    687    }
    688    if (nbr >= 10)
    689      return false;
    690 
    691    // check boundary patterns (^begin and end$)
    692    for (size_t j = 0; j < wordbreak.size(); ++j) {
    693      size_t plen = wordbreak[j].size();
    694      if (plen == 1 || plen > wl)
    695        continue;
    696 
    697      if (wordbreak[j][0] == '^' &&
    698          scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))
    699        return true;
    700 
    701      if (wordbreak[j][plen - 1] == '$' &&
    702          scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {
    703        std::string suffix(scw.substr(wl - plen + 1));
    704        scw.resize(wl - plen + 1);
    705        if (spell(scw))
    706          return true;
    707        scw.append(suffix);
    708      }
    709    }
    710 
    711    // other patterns
    712    for (size_t j = 0; j < wordbreak.size(); ++j) {
    713      size_t plen = wordbreak[j].size();
    714      size_t found = scw.find(wordbreak[j]);
    715      if ((found > 0) && (found < wl - plen)) {
    716        size_t found2 = scw.find(wordbreak[j], found + 1);
    717        // try to break at the second occurance
    718        // to recognize dictionary words with wordbreak
    719        if (found2 > 0 && (found2 < wl - plen))
    720            found = found2;
    721        if (!spell(scw.substr(found + plen)))
    722          continue;
    723        std::string suffix(scw.substr(found));
    724        scw.resize(found);
    725        // examine 2 sides of the break point
    726        if (spell(scw))
    727          return true;
    728        scw.append(suffix);
    729 
    730        // LANG_hu: spec. dash rule
    731        if (langnum == LANG_hu && wordbreak[j] == "-") {
    732          suffix = scw.substr(found + 1);
    733          scw.resize(found + 1);
    734          if (spell(scw))
    735            return true;  // check the first part with dash
    736          scw.append(suffix);
    737        }
    738        // end of LANG specific region
    739      }
    740    }
    741 
    742    // other patterns (break at first break point)
    743    for (size_t j = 0; j < wordbreak.size(); ++j) {
    744      size_t plen = wordbreak[j].size();
    745      size_t found = scw.find(wordbreak[j]);
    746      if ((found > 0) && (found < wl - plen)) {
    747        if (!spell(scw.substr(found + plen)))
    748          continue;
    749        std::string suffix(scw.substr(found));
    750        scw.resize(found);
    751        // examine 2 sides of the break point
    752        if (spell(scw))
    753          return true;
    754        scw.append(suffix);
    755 
    756        // LANG_hu: spec. dash rule
    757        if (langnum == LANG_hu && wordbreak[j] == "-") {
    758          suffix = scw.substr(found + 1);
    759          scw.resize(found + 1);
    760          if (spell(scw))
    761            return true;  // check the first part with dash
    762          scw.append(suffix);
    763        }
    764        // end of LANG specific region
    765      }
    766    }
    767  }
    768 
    769  return false;
    770 }
    771 
    772 struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
    773  std::string w2;
    774  const char* word;
    775  int len;
    776 
    777  // remove IGNORE characters from the string
    778  clean_ignore(w2, w);
    779 
    780  word = w2.c_str();
    781  len = w2.size();
    782 
    783  if (!len)
    784    return NULL;
    785 
    786  // word reversing wrapper for complex prefixes
    787  if (complexprefixes) {
    788    if (utf8)
    789      reverseword_utf(w2);
    790    else
    791      reverseword(w2);
    792  }
    793 
    794  word = w2.c_str();
    795 
    796  // look word in hash table
    797  struct hentry* he = NULL;
    798  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
    799    he = m_HMgrs[i]->lookup(word);
    800 
    801    // check forbidden and onlyincompound words
    802    if ((he) && (he->astr) && (pAMgr) &&
    803        TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
    804      if (info)
    805        *info |= SPELL_FORBIDDEN;
    806      // LANG_hu section: set dash information for suggestions
    807      if (langnum == LANG_hu) {
    808        if (pAMgr->get_compoundflag() &&
    809            TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
    810          if (info)
    811            *info |= SPELL_COMPOUND;
    812        }
    813      }
    814      return NULL;
    815    }
    816 
    817    // he = next not needaffix, onlyincompound homonym or onlyupcase word
    818    while (he && (he->astr) && pAMgr &&
    819           ((pAMgr->get_needaffix() &&
    820             TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
    821            (pAMgr->get_onlyincompound() &&
    822             TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
    823            (info && (*info & SPELL_INITCAP) &&
    824             TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
    825      he = he->next_homonym;
    826  }
    827 
    828  // check with affixes
    829  if (!he && pAMgr) {
    830    // try stripping off affixes */
    831    he = pAMgr->affix_check(word, len, 0);
    832 
    833    // check compound restriction and onlyupcase
    834    if (he && he->astr &&
    835        ((pAMgr->get_onlyincompound() &&
    836          TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
    837         (info && (*info & SPELL_INITCAP) &&
    838          TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
    839      he = NULL;
    840    }
    841 
    842    if (he) {
    843      if ((he->astr) && (pAMgr) &&
    844          TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
    845        if (info)
    846          *info |= SPELL_FORBIDDEN;
    847        return NULL;
    848      }
    849      if (root) {
    850        root->assign(he->word);
    851        if (complexprefixes) {
    852          if (utf8)
    853            reverseword_utf(*root);
    854          else
    855            reverseword(*root);
    856        }
    857      }
    858      // try check compound word
    859    } else if (pAMgr->get_compound()) {
    860      struct hentry* rwords[100];  // buffer for COMPOUND pattern checking
    861      he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
    862      // LANG_hu section: `moving rule' with last dash
    863      if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
    864        std::string dup(word, len - 1);
    865        he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
    866      }
    867      // end of LANG specific region
    868      if (he) {
    869        if (root) {
    870          root->assign(he->word);
    871          if (complexprefixes) {
    872            if (utf8)
    873              reverseword_utf(*root);
    874            else
    875              reverseword(*root);
    876          }
    877        }
    878        if (info)
    879          *info |= SPELL_COMPOUND;
    880      }
    881    }
    882  }
    883 
    884  return he;
    885 }
    886 
    887 std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
    888  bool capwords;
    889  size_t abbv;
    890  int captype;
    891  std::vector<std::string> slst = suggest_internal(word, capwords, abbv, captype);
    892  // word reversing wrapper for complex prefixes
    893  if (complexprefixes) {
    894    for (size_t j = 0; j < slst.size(); ++j) {
    895      if (utf8)
    896        reverseword_utf(slst[j]);
    897      else
    898        reverseword(slst[j]);
    899    }
    900  }
    901 
    902  // capitalize
    903  if (capwords)
    904    for (size_t j = 0; j < slst.size(); ++j) {
    905      mkinitcap(slst[j]);
    906    }
    907 
    908  // expand suggestions with dot(s)
    909  if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
    910    for (size_t j = 0; j < slst.size(); ++j) {
    911      slst[j].append(word.substr(word.size() - abbv));
    912    }
    913  }
    914 
    915  // remove bad capitalized and forbidden forms
    916  if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
    917    switch (captype) {
    918      case INITCAP:
    919      case ALLCAP: {
    920        size_t l = 0;
    921        for (size_t j = 0; j < slst.size(); ++j) {
    922          if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
    923            std::string s;
    924            std::vector<w_char> w;
    925            if (utf8) {
    926              u8_u16(w, slst[j]);
    927            } else {
    928              s = slst[j];
    929            }
    930            mkallsmall2(s, w);
    931            if (spell(s)) {
    932              slst[l] = s;
    933              ++l;
    934            } else {
    935              mkinitcap2(s, w);
    936              if (spell(s)) {
    937                slst[l] = s;
    938                ++l;
    939              }
    940            }
    941          } else {
    942            slst[l] = slst[j];
    943            ++l;
    944          }
    945        }
    946        slst.resize(l);
    947      }
    948    }
    949  }
    950 
    951  // remove duplications
    952  size_t l = 0;
    953  for (size_t j = 0; j < slst.size(); ++j) {
    954    slst[l] = slst[j];
    955    for (size_t k = 0; k < l; ++k) {
    956      if (slst[k] == slst[j]) {
    957        --l;
    958        break;
    959      }
    960    }
    961    ++l;
    962  }
    963  slst.resize(l);
    964 
    965  // output conversion
    966  RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
    967  if (rl) {
    968    for (size_t i = 0; rl && i < slst.size(); ++i) {
    969      std::string wspace;
    970      if (rl->conv(slst[i], wspace)) {
    971        slst[i] = wspace;
    972      }
    973    }
    974  }
    975  return slst;
    976 }
    977 
    978 std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word,
    979        bool& capwords, size_t& abbv, int& captype) {
    980  captype = NOCAP;
    981  abbv = 0;
    982  capwords = false;
    983 
    984  std::vector<std::string> slst;
    985 
    986  int onlycmpdsug = 0;
    987  if (!pSMgr || m_HMgrs.empty())
    988    return slst;
    989 
    990  // process XML input of the simplified API (see manual)
    991  if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
    992    return spellml(word);
    993  }
    994  if (utf8) {
    995    if (word.size() >= MAXWORDUTF8LEN)
    996      return slst;
    997  } else {
    998    if (word.size() >= MAXWORDLEN)
    999      return slst;
   1000  }
   1001  size_t wl = 0;
   1002 
   1003  std::string scw;
   1004  std::vector<w_char> sunicw;
   1005 
   1006  // input conversion
   1007  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
   1008  {
   1009    std::string wspace;
   1010 
   1011    bool convstatus = rl ? rl->conv(word, wspace) : false;
   1012    if (convstatus)
   1013      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
   1014    else
   1015      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
   1016 
   1017    if (wl == 0)
   1018      return slst;
   1019  }
   1020 
   1021  bool good = false;
   1022 
   1023  clock_t timelimit;
   1024  // initialize in every suggestion call
   1025  timelimit = clock();
   1026 
   1027  // check capitalized form for FORCEUCASE
   1028  if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
   1029    int info = SPELL_ORIGCAP;
   1030    if (checkword(scw, &info, NULL)) {
   1031      std::string form(scw);
   1032      mkinitcap(form);
   1033      slst.push_back(form);
   1034      return slst;
   1035    }
   1036  }
   1037 
   1038  switch (captype) {
   1039    case NOCAP: {
   1040      good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
   1041      if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1042          return slst;
   1043      if (abbv) {
   1044        std::string wspace(scw);
   1045        wspace.push_back('.');
   1046        good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
   1047        if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1048            return slst;
   1049      }
   1050      break;
   1051    }
   1052 
   1053    case INITCAP: {
   1054      capwords = true;
   1055      good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
   1056      if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1057          return slst;
   1058      std::string wspace(scw);
   1059      mkallsmall2(wspace, sunicw);
   1060      good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
   1061      if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1062          return slst;
   1063      break;
   1064    }
   1065    case HUHINITCAP:
   1066      capwords = true;
   1067      /* FALLTHROUGH */
   1068    case HUHCAP: {
   1069      good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
   1070      if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1071          return slst;
   1072      // something.The -> something. The
   1073      size_t dot_pos = scw.find('.');
   1074      if (dot_pos != std::string::npos) {
   1075        std::string postdot = scw.substr(dot_pos + 1);
   1076        int captype_;
   1077        if (utf8) {
   1078          std::vector<w_char> postdotu;
   1079          u8_u16(postdotu, postdot);
   1080          captype_ = get_captype_utf8(postdotu, langnum);
   1081        } else {
   1082          captype_ = get_captype(postdot, csconv);
   1083        }
   1084        if (captype_ == INITCAP) {
   1085          std::string str(scw);
   1086          str.insert(dot_pos + 1, 1, ' ');
   1087          insert_sug(slst, str);
   1088        }
   1089      }
   1090 
   1091      std::string wspace;
   1092 
   1093      if (captype == HUHINITCAP) {
   1094        // TheOpenOffice.org -> The OpenOffice.org
   1095        wspace = scw;
   1096        mkinitsmall2(wspace, sunicw);
   1097        good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
   1098        if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1099            return slst;
   1100      }
   1101      wspace = scw;
   1102      mkallsmall2(wspace, sunicw);
   1103      if (spell(wspace.c_str()))
   1104        insert_sug(slst, wspace);
   1105      size_t prevns = slst.size();
   1106      good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
   1107      if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1108          return slst;
   1109      if (captype == HUHINITCAP) {
   1110        mkinitcap2(wspace, sunicw);
   1111        if (spell(wspace.c_str()))
   1112          insert_sug(slst, wspace);
   1113        good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
   1114        if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1115            return slst;
   1116      }
   1117      // aNew -> "a New" (instead of "a new")
   1118      for (size_t j = prevns; j < slst.size(); ++j) {
   1119        const char* space = strchr(slst[j].c_str(), ' ');
   1120        if (space) {
   1121          size_t slen = strlen(space + 1);
   1122          // different case after space (need capitalisation)
   1123          if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
   1124            std::string first(slst[j].c_str(), space + 1);
   1125            std::string second(space + 1);
   1126            std::vector<w_char> w;
   1127            if (utf8)
   1128              u8_u16(w, second);
   1129            mkinitcap2(second, w);
   1130            // set as first suggestion
   1131            slst.erase(slst.begin() + j);
   1132            slst.insert(slst.begin(), first + second);
   1133          }
   1134        }
   1135      }
   1136      break;
   1137    }
   1138 
   1139    case ALLCAP: {
   1140      std::string wspace(scw);
   1141      mkallsmall2(wspace, sunicw);
   1142      good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
   1143      if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1144          return slst;
   1145      if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
   1146        insert_sug(slst, wspace);
   1147      mkinitcap2(wspace, sunicw);
   1148      good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
   1149      if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1150          return slst;
   1151      for (size_t j = 0; j < slst.size(); ++j) {
   1152        mkallcap(slst[j]);
   1153        if (pAMgr && pAMgr->get_checksharps()) {
   1154          if (utf8) {
   1155            mystrrep(slst[j], "\xC3\x9F", "SS");
   1156          } else {
   1157            mystrrep(slst[j], "\xDF", "SS");
   1158          }
   1159        }
   1160      }
   1161      break;
   1162    }
   1163  }
   1164 
   1165  // LANG_hu section: replace '-' with ' ' in Hungarian
   1166  if (langnum == LANG_hu) {
   1167    for (size_t j = 0; j < slst.size(); ++j) {
   1168      size_t pos = slst[j].find('-');
   1169      if (pos != std::string::npos) {
   1170        int info;
   1171        std::string w(slst[j].substr(0, pos));
   1172        w.append(slst[j].substr(pos + 1));
   1173        (void)spell(w, &info, NULL);
   1174        if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
   1175          slst[j][pos] = ' ';
   1176        } else
   1177          slst[j][pos] = '-';
   1178      }
   1179    }
   1180  }
   1181  // END OF LANG_hu section
   1182  // try ngram approach since found nothing good suggestion
   1183  if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
   1184    switch (captype) {
   1185      case NOCAP: {
   1186        pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP);
   1187        if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1188            return slst;
   1189        break;
   1190      }
   1191      /* FALLTHROUGH */
   1192      case HUHINITCAP:
   1193        capwords = true;
   1194      /* FALLTHROUGH */
   1195      case HUHCAP: {
   1196        std::string wspace(scw);
   1197        mkallsmall2(wspace, sunicw);
   1198        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP);
   1199        if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1200            return slst;
   1201        break;
   1202      }
   1203      case INITCAP: {
   1204        capwords = true;
   1205        std::string wspace(scw);
   1206        mkallsmall2(wspace, sunicw);
   1207        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP);
   1208        if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1209            return slst;
   1210        break;
   1211      }
   1212      case ALLCAP: {
   1213        std::string wspace(scw);
   1214        mkallsmall2(wspace, sunicw);
   1215        size_t oldns = slst.size();
   1216        pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP);
   1217        if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1218            return slst;
   1219        for (size_t j = oldns; j < slst.size(); ++j) {
   1220          mkallcap(slst[j]);
   1221        }
   1222        break;
   1223      }
   1224    }
   1225  }
   1226 
   1227  // try dash suggestion (Afo-American -> Afro-American)
   1228  // Note: LibreOffice was modified to treat dashes as word
   1229  // characters to check "scot-free" etc. word forms, but
   1230  // we need to handle suggestions for "Afo-American", etc.,
   1231  // while "Afro-American" is missing from the dictionary.
   1232  // TODO avoid possible overgeneration
   1233  size_t dash_pos = scw.find('-');
   1234  if (dash_pos != std::string::npos) {
   1235    int nodashsug = 1;
   1236    for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
   1237      if (slst[j].find('-') != std::string::npos)
   1238        nodashsug = 0;
   1239    }
   1240 
   1241    size_t prev_pos = 0;
   1242    bool last = false;
   1243 
   1244    while (!good && nodashsug && !last) {
   1245      if (dash_pos == scw.size())
   1246        last = 1;
   1247      std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
   1248      if (!spell(chunk.c_str())) {
   1249        std::vector<std::string> nlst = suggest(chunk.c_str());
   1250        if (clock() > timelimit + TIMELIMIT_GLOBAL)
   1251            return slst;
   1252        for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
   1253          std::string wspace = scw.substr(0, prev_pos);
   1254          wspace.append(*j);
   1255          if (!last) {
   1256            wspace.append("-");
   1257            wspace.append(scw.substr(dash_pos + 1));
   1258          }
   1259          int info = 0;
   1260          if (pAMgr && pAMgr->get_forbiddenword())
   1261            checkword(wspace, &info, NULL);
   1262          if (!(info & SPELL_FORBIDDEN))
   1263            insert_sug(slst, wspace);
   1264        }
   1265        nodashsug = 0;
   1266      }
   1267      if (!last) {
   1268        prev_pos = dash_pos + 1;
   1269        dash_pos = scw.find('-', prev_pos);
   1270      }
   1271      if (dash_pos == std::string::npos)
   1272        dash_pos = scw.size();
   1273    }
   1274  }
   1275  return slst;
   1276 }
   1277 
   1278 const std::string& HunspellImpl::get_dict_encoding() const {
   1279  return encoding;
   1280 }
   1281 
   1282 std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
   1283  std::vector<std::string> slst;
   1284 
   1285  std::string result2;
   1286  if (desc.empty())
   1287    return slst;
   1288  for (size_t i = 0; i < desc.size(); ++i) {
   1289 
   1290    std::string result;
   1291 
   1292    // add compound word parts (except the last one)
   1293    const char* s = desc[i].c_str();
   1294    const char* part = strstr(s, MORPH_PART);
   1295    if (part) {
   1296      const char* nextpart = strstr(part + 1, MORPH_PART);
   1297      while (nextpart) {
   1298        std::string field;
   1299        copy_field(field, part, MORPH_PART);
   1300        result.append(field);
   1301        part = nextpart;
   1302        nextpart = strstr(part + 1, MORPH_PART);
   1303      }
   1304      s = part;
   1305    }
   1306 
   1307    std::string tok(s);
   1308    size_t alt = 0;
   1309    while ((alt = tok.find(" | ", alt)) != std::string::npos) {
   1310      tok[alt + 1] = MSEP_ALT;
   1311    }
   1312    std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
   1313    for (size_t k = 0; k < pl.size(); ++k) {
   1314      // add derivational suffixes
   1315      if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) {
   1316        // remove inflectional suffixes
   1317        const size_t is = pl[k].find(MORPH_INFL_SFX);
   1318        if (is != std::string::npos)
   1319          pl[k].resize(is);
   1320        std::vector<std::string> singlepl;
   1321        singlepl.push_back(pl[k]);
   1322        std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);
   1323        if (!sg.empty()) {
   1324          std::vector<std::string> gen = line_tok(sg, MSEP_REC);
   1325          for (size_t j = 0; j < gen.size(); ++j) {
   1326            result2.push_back(MSEP_REC);
   1327            result2.append(result);
   1328            result2.append(gen[j]);
   1329          }
   1330        }
   1331      } else {
   1332        result2.push_back(MSEP_REC);
   1333        result2.append(result);
   1334        if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) {
   1335          std::string field;
   1336          copy_field(field, pl[k], MORPH_SURF_PFX);
   1337          result2.append(field);
   1338        }
   1339        std::string field;
   1340        copy_field(field, pl[k], MORPH_STEM);
   1341        result2.append(field);
   1342      }
   1343    }
   1344  }
   1345  slst = line_tok(result2, MSEP_REC);
   1346  uniqlist(slst);
   1347  return slst;
   1348 }
   1349 
   1350 std::vector<std::string> HunspellImpl::stem(const std::string& word) {
   1351  return stem(analyze(word));
   1352 }
   1353 
   1354 const std::string& HunspellImpl::get_wordchars_cpp() const {
   1355  return pAMgr->get_wordchars();
   1356 }
   1357 
   1358 const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
   1359  return pAMgr->get_wordchars_utf16();
   1360 }
   1361 
   1362 void HunspellImpl::mkinitcap(std::string& u8) {
   1363  if (utf8) {
   1364    std::vector<w_char> u16;
   1365    u8_u16(u16, u8);
   1366    ::mkinitcap_utf(u16, langnum);
   1367    u16_u8(u8, u16);
   1368  } else {
   1369    ::mkinitcap(u8, csconv);
   1370  }
   1371 }
   1372 
   1373 int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
   1374  if (utf8) {
   1375    ::mkinitcap_utf(u16, langnum);
   1376    u16_u8(u8, u16);
   1377  } else {
   1378    ::mkinitcap(u8, csconv);
   1379  }
   1380  return u8.size();
   1381 }
   1382 
   1383 int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
   1384  if (utf8) {
   1385    ::mkinitsmall_utf(u16, langnum);
   1386    u16_u8(u8, u16);
   1387  } else {
   1388    ::mkinitsmall(u8, csconv);
   1389  }
   1390  return u8.size();
   1391 }
   1392 
   1393 int HunspellImpl::add(const std::string& word) {
   1394  if (!m_HMgrs.empty())
   1395    return m_HMgrs[0]->add(word);
   1396  return 0;
   1397 }
   1398 
   1399 int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
   1400  if (!m_HMgrs.empty())
   1401    return m_HMgrs[0]->add_with_affix(word, example);
   1402  return 0;
   1403 }
   1404 
   1405 int HunspellImpl::remove(const std::string& word) {
   1406  if (!m_HMgrs.empty())
   1407    return m_HMgrs[0]->remove(word);
   1408  return 0;
   1409 }
   1410 
   1411 const std::string& HunspellImpl::get_version_cpp() const {
   1412  return pAMgr->get_version();
   1413 }
   1414 
   1415 struct cs_info* HunspellImpl::get_csconv() {
   1416  return csconv;
   1417 }
   1418 
   1419 void HunspellImpl::cat_result(std::string& result, const std::string& st) {
   1420  if (!st.empty()) {
   1421    if (!result.empty())
   1422      result.append("\n");
   1423    result.append(st);
   1424  }
   1425 }
   1426 
   1427 std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
   1428  std::vector<std::string> slst = analyze_internal(word);
   1429  // output conversion
   1430  RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
   1431  if (rl) {
   1432    for (size_t i = 0; rl && i < slst.size(); ++i) {
   1433      std::string wspace;
   1434      if (rl->conv(slst[i], wspace)) {
   1435        slst[i] = wspace;
   1436      }
   1437    }
   1438  }
   1439  return slst;
   1440 }
   1441 
   1442 std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) {
   1443  std::vector<std::string> slst;
   1444  if (!pSMgr || m_HMgrs.empty())
   1445    return slst;
   1446  if (utf8) {
   1447    if (word.size() >= MAXWORDUTF8LEN)
   1448      return slst;
   1449  } else {
   1450    if (word.size() >= MAXWORDLEN)
   1451      return slst;
   1452  }
   1453  int captype = NOCAP;
   1454  size_t abbv = 0;
   1455  size_t wl = 0;
   1456 
   1457  std::string scw;
   1458  std::vector<w_char> sunicw;
   1459 
   1460  // input conversion
   1461  RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
   1462  {
   1463    std::string wspace;
   1464 
   1465    bool convstatus = rl ? rl->conv(word, wspace) : false;
   1466    if (convstatus)
   1467      wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
   1468    else
   1469      wl = cleanword2(scw, sunicw, word, &captype, &abbv);
   1470  }
   1471 
   1472  if (wl == 0) {
   1473    if (abbv) {
   1474      scw.clear();
   1475      for (wl = 0; wl < abbv; wl++)
   1476        scw.push_back('.');
   1477      abbv = 0;
   1478    } else
   1479      return slst;
   1480  }
   1481 
   1482  std::string result;
   1483 
   1484  size_t n = 0;
   1485  // test numbers
   1486  // LANG_hu section: set dash information for suggestions
   1487  if (langnum == LANG_hu) {
   1488    size_t n2 = 0;
   1489    size_t n3 = 0;
   1490 
   1491    while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
   1492                        (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
   1493      n++;
   1494      if ((scw[n] == '.') || (scw[n] == ',')) {
   1495        if (((n2 == 0) && (n > 3)) ||
   1496            ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
   1497          break;
   1498        n2++;
   1499        n3 = n;
   1500      }
   1501    }
   1502 
   1503    if ((n == wl) && (n3 > 0) && (n - n3 > 3))
   1504      return slst;
   1505    if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
   1506                      checkword(scw.substr(n), NULL, NULL))) {
   1507      result.append(scw);
   1508      result.resize(n - 1);
   1509      if (n == wl)
   1510        cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
   1511      else {
   1512        std::string chunk = scw.substr(n - 1, 1);
   1513        cat_result(result, pSMgr->suggest_morph(chunk));
   1514        result.push_back('+');  // XXX SPEC. MORPHCODE
   1515        cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
   1516      }
   1517      return line_tok(result, MSEP_REC);
   1518    }
   1519  }
   1520  // END OF LANG_hu section
   1521 
   1522  switch (captype) {
   1523    case HUHCAP:
   1524    case HUHINITCAP:
   1525    case NOCAP: {
   1526      cat_result(result, pSMgr->suggest_morph(scw));
   1527      if (abbv) {
   1528        std::string u8buffer(scw);
   1529        u8buffer.push_back('.');
   1530        cat_result(result, pSMgr->suggest_morph(u8buffer));
   1531      }
   1532      break;
   1533    }
   1534    case INITCAP: {
   1535      mkallsmall2(scw, sunicw);
   1536      std::string u8buffer(scw);
   1537      mkinitcap2(scw, sunicw);
   1538      cat_result(result, pSMgr->suggest_morph(u8buffer));
   1539      cat_result(result, pSMgr->suggest_morph(scw));
   1540      if (abbv) {
   1541        u8buffer.push_back('.');
   1542        cat_result(result, pSMgr->suggest_morph(u8buffer));
   1543 
   1544        u8buffer = scw;
   1545        u8buffer.push_back('.');
   1546 
   1547        cat_result(result, pSMgr->suggest_morph(u8buffer));
   1548      }
   1549      break;
   1550    }
   1551    case ALLCAP: {
   1552      cat_result(result, pSMgr->suggest_morph(scw));
   1553      if (abbv) {
   1554        std::string u8buffer(scw);
   1555        u8buffer.push_back('.');
   1556        cat_result(result, pSMgr->suggest_morph(u8buffer));
   1557      }
   1558      mkallsmall2(scw, sunicw);
   1559      std::string u8buffer(scw);
   1560      mkinitcap2(scw, sunicw);
   1561 
   1562      cat_result(result, pSMgr->suggest_morph(u8buffer));
   1563      cat_result(result, pSMgr->suggest_morph(scw));
   1564      if (abbv) {
   1565        u8buffer.push_back('.');
   1566        cat_result(result, pSMgr->suggest_morph(u8buffer));
   1567 
   1568        u8buffer = scw;
   1569        u8buffer.push_back('.');
   1570 
   1571        cat_result(result, pSMgr->suggest_morph(u8buffer));
   1572      }
   1573      break;
   1574    }
   1575  }
   1576 
   1577  if (!result.empty()) {
   1578    // word reversing wrapper for complex prefixes
   1579    if (complexprefixes) {
   1580      if (utf8)
   1581        reverseword_utf(result);
   1582      else
   1583        reverseword(result);
   1584    }
   1585    return line_tok(result, MSEP_REC);
   1586  }
   1587 
   1588  // compound word with dash (HU) I18n
   1589  // LANG_hu section: set dash information for suggestions
   1590 
   1591  size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
   1592  if (dash_pos != std::string::npos) {
   1593    int nresult = 0;
   1594 
   1595    std::string part1 = scw.substr(0, dash_pos);
   1596    std::string part2 = scw.substr(dash_pos+1);
   1597 
   1598    // examine 2 sides of the dash
   1599    if (part2.empty()) {  // base word ending with dash
   1600      if (spell(part1)) {
   1601        std::string p = pSMgr->suggest_morph(part1);
   1602        if (!p.empty()) {
   1603          slst = line_tok(p, MSEP_REC);
   1604          return slst;
   1605        }
   1606      }
   1607    } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat.
   1608      if (spell(part1) && (spell("-e"))) {
   1609        std::string st = pSMgr->suggest_morph(part1);
   1610        if (!st.empty()) {
   1611          result.append(st);
   1612        }
   1613        result.push_back('+');  // XXX spec. separator in MORPHCODE
   1614        st = pSMgr->suggest_morph("-e");
   1615        if (!st.empty()) {
   1616          result.append(st);
   1617        }
   1618        return line_tok(result, MSEP_REC);
   1619      }
   1620    } else {
   1621      // first word ending with dash: word- XXX ???
   1622      part1.push_back(' ');
   1623      nresult = spell(part1);
   1624      part1.erase(part1.size() - 1);
   1625      if (nresult && spell(part2) &&
   1626          ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
   1627        std::string st = pSMgr->suggest_morph(part1);
   1628        if (!st.empty()) {
   1629          result.append(st);
   1630          result.push_back('+');  // XXX spec. separator in MORPHCODE
   1631        }
   1632        st = pSMgr->suggest_morph(part2);
   1633        if (!st.empty()) {
   1634          result.append(st);
   1635        }
   1636        return line_tok(result, MSEP_REC);
   1637      }
   1638    }
   1639    // affixed number in correct word
   1640    if (nresult && (dash_pos > 0) &&
   1641        (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
   1642         (scw[dash_pos - 1] == '.'))) {
   1643      n = 1;
   1644      if (scw[dash_pos - n] == '.')
   1645        n++;
   1646      // search first not a number character to left from dash
   1647      while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
   1648             (n < 6)) {
   1649        n++;
   1650      }
   1651      if (dash_pos < n)
   1652        n--;
   1653      // numbers: valami1000000-hoz
   1654      // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
   1655      // 56-hoz, 6-hoz
   1656      for (; n >= 1; n--) {
   1657        if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
   1658            continue;
   1659        }
   1660        std::string chunk = scw.substr(dash_pos - n);
   1661        if (checkword(chunk, NULL, NULL)) {
   1662          result.append(chunk);
   1663          std::string st = pSMgr->suggest_morph(chunk);
   1664          if (!st.empty()) {
   1665            result.append(st);
   1666          }
   1667          return line_tok(result, MSEP_REC);
   1668        }
   1669      }
   1670    }
   1671  }
   1672  return slst;
   1673 }
   1674 
   1675 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
   1676  std::vector<std::string> slst;
   1677  if (!pSMgr || pl.empty())
   1678    return slst;
   1679  std::vector<std::string> pl2 = analyze(word);
   1680  int captype = NOCAP;
   1681  int abbv = 0;
   1682  std::string cw;
   1683  cleanword(cw, word, &captype, &abbv);
   1684  std::string result;
   1685 
   1686  for (size_t i = 0; i < pl.size(); ++i) {
   1687    cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));
   1688  }
   1689 
   1690  if (!result.empty()) {
   1691    // allcap
   1692    if (captype == ALLCAP)
   1693      mkallcap(result);
   1694 
   1695    // line split
   1696    slst = line_tok(result, MSEP_REC);
   1697 
   1698    // capitalize
   1699    if (captype == INITCAP || captype == HUHINITCAP) {
   1700      for (size_t j = 0; j < slst.size(); ++j) {
   1701        mkinitcap(slst[j]);
   1702      }
   1703    }
   1704 
   1705    // temporary filtering of prefix related errors (eg.
   1706    // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
   1707    std::vector<std::string>::iterator it = slst.begin();
   1708    while (it != slst.end()) {
   1709      if (!spell(*it)) {
   1710        it = slst.erase(it);
   1711      } else  {
   1712        ++it;
   1713      }
   1714    }
   1715  }
   1716  return slst;
   1717 }
   1718 
   1719 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
   1720  std::vector<std::string> pl = analyze(pattern);
   1721  std::vector<std::string> slst = generate(word, pl);
   1722  uniqlist(slst);
   1723  return slst;
   1724 }
   1725 
   1726 // minimal XML parser functions
   1727 std::string HunspellImpl::get_xml_par(const std::string& in_par, std::string::size_type pos) {
   1728  std::string dest;
   1729  if (pos == std::string::npos)
   1730    return dest;
   1731  const char* par = in_par.c_str() + pos;
   1732  char end = *par;
   1733  if (end == '>')
   1734    end = '<';
   1735  else if (end != '\'' && end != '"')
   1736    return dest;  // bad XML
   1737  for (par++; *par != '\0' && *par != end; ++par) {
   1738    dest.push_back(*par);
   1739  }
   1740  mystrrep(dest, "&lt;", "<");
   1741  mystrrep(dest, "&amp;", "&");
   1742  return dest;
   1743 }
   1744 
   1745 int HunspellImpl::get_langnum() const {
   1746  return langnum;
   1747 }
   1748 
   1749 bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
   1750  RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
   1751  if (rl) {
   1752    return rl->conv(word, dest);
   1753  }
   1754  dest.assign(word);
   1755  return false;
   1756 }
   1757 
   1758 // return the beginning of the element (attr == NULL) or the attribute
   1759 std::string::size_type HunspellImpl::get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr) {
   1760  if (pos == std::string::npos)
   1761    return std::string::npos;
   1762 
   1763  std::string::size_type endpos = s.find('>', pos);
   1764  if (attr == NULL)
   1765    return endpos;
   1766  while (true) {
   1767    pos = s.find(attr, pos);
   1768    if (pos == std::string::npos || pos >= endpos)
   1769      return std::string::npos;
   1770    if (s[pos - 1] == ' ' || s[pos - 1] == '\n')
   1771      break;
   1772    pos += strlen(attr);
   1773  }
   1774  return pos + strlen(attr);
   1775 }
   1776 
   1777 int HunspellImpl::check_xml_par(const std::string& q, std::string::size_type pos,
   1778                                const char* attr,
   1779                                const char* value) {
   1780  std::string cw = get_xml_par(q, get_xml_pos(q, pos, attr));
   1781  if (cw == value)
   1782    return 1;
   1783  return 0;
   1784 }
   1785 
   1786 std::vector<std::string> HunspellImpl::get_xml_list(const std::string& list, std::string::size_type pos, const char* tag) {
   1787  std::vector<std::string> slst;
   1788  if (pos == std::string::npos)
   1789    return slst;
   1790  while (true) {
   1791    pos = list.find(tag, pos);
   1792    if (pos == std::string::npos)
   1793        break;
   1794    std::string cw = get_xml_par(list, pos + strlen(tag) - 1);
   1795    if (cw.empty()) {
   1796      break;
   1797    }
   1798    slst.push_back(cw);
   1799    ++pos;
   1800  }
   1801  return slst;
   1802 }
   1803 
   1804 std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
   1805  std::vector<std::string> slst;
   1806 
   1807  std::string::size_type qpos = in_word.find("<query");
   1808  if (qpos == std::string::npos)
   1809    return slst;  // bad XML input
   1810 
   1811  std::string::size_type q2pos = in_word.find('>', qpos);
   1812  if (q2pos == std::string::npos)
   1813    return slst;  // bad XML input
   1814 
   1815  q2pos = in_word.find("<word", q2pos);
   1816  if (q2pos == std::string::npos)
   1817    return slst;  // bad XML input
   1818 
   1819  if (check_xml_par(in_word, qpos, "type=", "analyze")) {
   1820    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
   1821    if (!cw.empty())
   1822      slst = analyze(cw);
   1823    if (slst.empty())
   1824      return slst;
   1825    // convert the result to <code><a>ana1</a><a>ana2</a></code> format
   1826    std::string r;
   1827    r.append("<code>");
   1828    for (size_t i = 0; i < slst.size(); ++i) {
   1829      r.append("<a>");
   1830 
   1831      std::string entry(slst[i]);
   1832      mystrrep(entry, "\t", " ");
   1833      mystrrep(entry, "&", "&amp;");
   1834      mystrrep(entry, "<", "&lt;");
   1835      r.append(entry);
   1836 
   1837      r.append("</a>");
   1838    }
   1839    r.append("</code>");
   1840    slst.clear();
   1841    slst.push_back(r);
   1842    return slst;
   1843  } else if (check_xml_par(in_word, qpos, "type=", "stem")) {
   1844    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
   1845    if (!cw.empty())
   1846      return stem(cw);
   1847  } else if (check_xml_par(in_word, qpos, "type=", "generate")) {
   1848    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
   1849    if (cw.empty())
   1850      return slst;
   1851    std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
   1852    if (q3pos != std::string::npos) {
   1853      std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
   1854      if (!cw2.empty()) {
   1855        return generate(cw, cw2);
   1856      }
   1857    } else {
   1858      q2pos = in_word.find("<code", q2pos + 1);
   1859      if (q2pos != std::string::npos) {
   1860        std::vector<std::string> slst2 = get_xml_list(in_word, in_word.find('>', q2pos), "<a>");
   1861        if (!slst2.empty()) {
   1862          slst = generate(cw, slst2);
   1863          uniqlist(slst);
   1864          return slst;
   1865        }
   1866      }
   1867    }
   1868  } else if (check_xml_par(in_word, qpos, "type=", "add")) {
   1869    std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
   1870    if (cw.empty())
   1871      return slst;
   1872    std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
   1873    if (q3pos != std::string::npos) {
   1874      std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
   1875      if (!cw2.empty()) {
   1876        add_with_affix(cw, cw2);
   1877      } else {
   1878        add(cw);
   1879      }
   1880    } else {
   1881        add(cw);
   1882    }
   1883  }
   1884  return slst;
   1885 }
   1886 
   1887 std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
   1888  std::vector<std::string> slst;
   1889  struct hentry* he = NULL;
   1890  int len;
   1891  std::string w2;
   1892  const char* word;
   1893  const char* ignoredchars = pAMgr->get_ignore();
   1894  if (ignoredchars != NULL) {
   1895    w2.assign(root_word);
   1896    if (utf8) {
   1897      const std::vector<w_char>& ignoredchars_utf16 =
   1898          pAMgr->get_ignore_utf16();
   1899      remove_ignored_chars_utf(w2, ignoredchars_utf16);
   1900    } else {
   1901      remove_ignored_chars(w2, ignoredchars);
   1902    }
   1903    word = w2.c_str();
   1904  } else
   1905    word = root_word.c_str();
   1906 
   1907  len = strlen(word);
   1908 
   1909  if (!len)
   1910    return slst;
   1911 
   1912  for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
   1913    he = m_HMgrs[i]->lookup(word);
   1914  }
   1915  if (he) {
   1916    slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
   1917  }
   1918  return slst;
   1919 }
   1920 
   1921 namespace {
   1922  int munge_vector(char*** slst, const std::vector<std::string>& items) {
   1923    if (items.empty()) {
   1924      *slst = NULL;
   1925      return 0;
   1926    } else {
   1927      *slst = (char**)malloc(sizeof(char*) * items.size());
   1928      if (!*slst)
   1929        return 0;
   1930      for (size_t i = 0; i < items.size(); ++i)
   1931        (*slst)[i] = mystrdup(items[i].c_str());
   1932    }
   1933    return items.size();
   1934  }
   1935 }
   1936 
   1937 int HunspellImpl::spell(const char* word, int* info, char** root) {
   1938  std::string sroot;
   1939  bool ret = spell(word, info, root ? &sroot : NULL);
   1940  if (root) {
   1941    if (sroot.empty()) {
   1942      *root = NULL;
   1943    } else {
   1944      *root = mystrdup(sroot.c_str());
   1945    }
   1946  }
   1947  return ret;
   1948 }
   1949 
   1950 int HunspellImpl::suggest(char*** slst, const char* word) {
   1951  std::vector<std::string> suggests = suggest(word);
   1952  return munge_vector(slst, suggests);
   1953 }
   1954 
   1955 int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) {
   1956  std::vector<std::string> stems = suffix_suggest(root_word);
   1957  return munge_vector(slst, stems);
   1958 }
   1959 
   1960 void HunspellImpl::free_list(char*** slst, int n) {
   1961  if (slst && *slst) {
   1962    for (int i = 0; i < n; i++)
   1963      free((*slst)[i]);
   1964    free(*slst);
   1965    *slst = NULL;
   1966  }
   1967 }
   1968 
   1969 char* HunspellImpl::get_dic_encoding() {
   1970  return &encoding[0];
   1971 }
   1972 
   1973 int HunspellImpl::analyze(char*** slst, const char* word) {
   1974  std::vector<std::string> stems = analyze(word);
   1975  return munge_vector(slst, stems);
   1976 }
   1977 
   1978 int HunspellImpl::stem(char*** slst, const char* word) {
   1979  std::vector<std::string> stems = stem(word);
   1980  return munge_vector(slst, stems);
   1981 }
   1982 
   1983 int HunspellImpl::stem(char*** slst, char** desc, int n) {
   1984  std::vector<std::string> morph;
   1985  morph.reserve(n);
   1986  for (int i = 0; i < n; ++i)
   1987    morph.push_back(desc[i]);
   1988 
   1989  std::vector<std::string> stems = stem(morph);
   1990  return munge_vector(slst, stems);
   1991 }
   1992 
   1993 int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) {
   1994  std::vector<std::string> stems = generate(word, pattern);
   1995  return munge_vector(slst, stems);
   1996 }
   1997 
   1998 int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) {
   1999  std::vector<std::string> morph;
   2000  morph.reserve(pln);
   2001  for (int i = 0; i < pln; ++i)
   2002    morph.push_back(pl[i]);
   2003 
   2004  std::vector<std::string> stems = generate(word, morph);
   2005  return munge_vector(slst, stems);
   2006 }
   2007 
   2008 const char* HunspellImpl::get_wordchars() const {
   2009  return get_wordchars_cpp().c_str();
   2010 }
   2011 
   2012 const char* HunspellImpl::get_version() const {
   2013  return get_version_cpp().c_str();
   2014 }
   2015 
   2016 int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) {
   2017  std::string d;
   2018  bool ret = input_conv(word, d);
   2019  if (ret && d.size() < destsize) {
   2020    strncpy(dest, d.c_str(), destsize);
   2021    return 1;
   2022  }
   2023  return 0;
   2024 }
   2025 
   2026 Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
   2027  : m_Impl(new HunspellImpl(affpath, dpath, key)) {
   2028 }
   2029 
   2030 Hunspell::~Hunspell() {
   2031  delete m_Impl;
   2032 }
   2033 
   2034 // load extra dictionaries
   2035 int Hunspell::add_dic(const char* dpath, const char* key) {
   2036  return m_Impl->add_dic(dpath, key);
   2037 }
   2038 
   2039 bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
   2040  return m_Impl->spell(word, info, root);
   2041 }
   2042 
   2043 std::vector<std::string> Hunspell::suggest(const std::string& word) {
   2044  return m_Impl->suggest(word);
   2045 }
   2046 
   2047 std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
   2048  return m_Impl->suffix_suggest(root_word);
   2049 }
   2050 
   2051 const std::string& Hunspell::get_dict_encoding() const {
   2052  return m_Impl->get_dict_encoding();
   2053 }
   2054 
   2055 std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
   2056  return m_Impl->stem(desc);
   2057 }
   2058 
   2059 std::vector<std::string> Hunspell::stem(const std::string& word) {
   2060  return m_Impl->stem(word);
   2061 }
   2062 
   2063 const std::string& Hunspell::get_wordchars_cpp() const {
   2064  return m_Impl->get_wordchars_cpp();
   2065 }
   2066 
   2067 const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
   2068  return m_Impl->get_wordchars_utf16();
   2069 }
   2070 
   2071 int Hunspell::add(const std::string& word) {
   2072  return m_Impl->add(word);
   2073 }
   2074 
   2075 int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
   2076  return m_Impl->add_with_affix(word, example);
   2077 }
   2078 
   2079 int Hunspell::remove(const std::string& word) {
   2080  return m_Impl->remove(word);
   2081 }
   2082 
   2083 const std::string& Hunspell::get_version_cpp() const {
   2084  return m_Impl->get_version_cpp();
   2085 }
   2086 
   2087 struct cs_info* Hunspell::get_csconv() {
   2088  return m_Impl->get_csconv();
   2089 }
   2090 
   2091 std::vector<std::string> Hunspell::analyze(const std::string& word) {
   2092  return m_Impl->analyze(word);
   2093 }
   2094 
   2095 std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
   2096  return m_Impl->generate(word, pl);
   2097 }
   2098 
   2099 std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
   2100  return m_Impl->generate(word, pattern);
   2101 }
   2102 
   2103 int Hunspell::get_langnum() const {
   2104  return m_Impl->get_langnum();
   2105 }
   2106 
   2107 bool Hunspell::input_conv(const std::string& word, std::string& dest) {
   2108  return m_Impl->input_conv(word, dest);
   2109 }
   2110 
   2111 int Hunspell::spell(const char* word, int* info, char** root) {
   2112  return m_Impl->spell(word, info, root);
   2113 }
   2114 
   2115 int Hunspell::suggest(char*** slst, const char* word) {
   2116  return m_Impl->suggest(slst, word);
   2117 }
   2118 
   2119 int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
   2120  return m_Impl->suffix_suggest(slst, root_word);
   2121 }
   2122 
   2123 void Hunspell::free_list(char*** slst, int n) {
   2124  m_Impl->free_list(slst, n);
   2125 }
   2126 
   2127 char* Hunspell::get_dic_encoding() {
   2128  return m_Impl->get_dic_encoding();
   2129 }
   2130 
   2131 int Hunspell::analyze(char*** slst, const char* word) {
   2132  return m_Impl->analyze(slst, word);
   2133 }
   2134 
   2135 int Hunspell::stem(char*** slst, const char* word) {
   2136  return m_Impl->stem(slst, word);
   2137 }
   2138 
   2139 int Hunspell::stem(char*** slst, char** desc, int n) {
   2140  return m_Impl->stem(slst, desc, n);
   2141 }
   2142 
   2143 int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
   2144  return m_Impl->generate(slst, word, pattern);
   2145 }
   2146 
   2147 int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
   2148  return m_Impl->generate(slst, word, pl, pln);
   2149 }
   2150 
   2151 const char* Hunspell::get_wordchars() const {
   2152  return m_Impl->get_wordchars();
   2153 }
   2154 
   2155 const char* Hunspell::get_version() const {
   2156  return m_Impl->get_version();
   2157 }
   2158 
   2159 int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
   2160  return m_Impl->input_conv(word, dest, destsize);
   2161 }
   2162 
   2163 Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
   2164  return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath));
   2165 }
   2166 
   2167 Hunhandle* Hunspell_create_key(const char* affpath,
   2168                               const char* dpath,
   2169                               const char* key) {
   2170  return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key));
   2171 }
   2172 
   2173 void Hunspell_destroy(Hunhandle* pHunspell) {
   2174  delete reinterpret_cast<HunspellImpl*>(pHunspell);
   2175 }
   2176 
   2177 int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
   2178  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath);
   2179 }
   2180 
   2181 int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
   2182  return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word);
   2183 }
   2184 
   2185 char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
   2186  return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding();
   2187 }
   2188 
   2189 int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
   2190  return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word);
   2191 }
   2192 
   2193 int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
   2194  return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word);
   2195 }
   2196 
   2197 int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
   2198  return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word);
   2199 }
   2200 
   2201 int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
   2202  return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n);
   2203 }
   2204 
   2205 int Hunspell_generate(Hunhandle* pHunspell,
   2206                      char*** slst,
   2207                      const char* word,
   2208                      const char* pattern)
   2209 {
   2210  return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern);
   2211 }
   2212 
   2213 int Hunspell_generate2(Hunhandle* pHunspell,
   2214                       char*** slst,
   2215                       const char* word,
   2216                       char** desc,
   2217                       int n)
   2218 {
   2219  return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n);
   2220 }
   2221 
   2222 /* functions for run-time modification of the dictionary */
   2223 
   2224 /* add word to the run-time dictionary */
   2225 
   2226 int Hunspell_add(Hunhandle* pHunspell, const char* word) {
   2227  return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word);
   2228 }
   2229 
   2230 /* add word to the run-time dictionary with affix flags of
   2231 * the example (a dictionary word): Hunspell will recognize
   2232 * affixed forms of the new word, too.
   2233 */
   2234 
   2235 int Hunspell_add_with_affix(Hunhandle* pHunspell,
   2236                            const char* word,
   2237                            const char* example) {
   2238  return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example);
   2239 }
   2240 
   2241 /* remove word from the run-time dictionary */
   2242 
   2243 int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
   2244  return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word);
   2245 }
   2246 
   2247 void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) {
   2248  reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n);
   2249 }
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE