hunspell.cxx (66656B)
1 /* ***** BEGIN LICENSE BLOCK ***** 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 * 4 * Copyright (C) 2002-2022 Németh László 5 * 6 * The contents of this file are subject to the Mozilla Public License Version 7 * 1.1 (the "License"); you may not use this file except in compliance with 8 * the License. You may obtain a copy of the License at 9 * http://www.mozilla.org/MPL/ 10 * 11 * Software distributed under the License is distributed on an "AS IS" basis, 12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 13 * for the specific language governing rights and limitations under the 14 * License. 15 * 16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. 17 * 18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, 19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, 20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, 21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, 22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen 23 * 24 * Alternatively, the contents of this file may be used under the terms of 25 * either the GNU General Public License Version 2 or later (the "GPL"), or 26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 * in which case the provisions of the GPL or the LGPL are applicable instead 28 * of those above. If you wish to allow use of your version of this file only 29 * under the terms of either the GPL or the LGPL, and not to allow others to 30 * use your version of this file under the terms of the MPL, indicate your 31 * decision by deleting the provisions above and replace them with the notice 32 * and other provisions required by the GPL or the LGPL. If you do not delete 33 * the provisions above, a recipient may use your version of this file under 34 * the terms of any one of the MPL, the GPL or the LGPL. 35 * 36 * ***** END LICENSE BLOCK ***** */ 37 /* 38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada 39 * And Contributors. All rights reserved. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 52 * 3. All modifications to the source code must be clearly marked as 53 * such. Binary redistributions based on modified source code 54 * must be clearly marked as modified versions in the documentation 55 * and/or other materials provided with the distribution. 56 * 57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS 58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 #include <stdlib.h> 72 #include <string.h> 73 #include <stdio.h> 74 #include <time.h> 75 76 #include "affixmgr.hxx" 77 #include "hunspell.hxx" 78 #include "suggestmgr.hxx" 79 #include "hunspell.h" 80 #include "csutil.hxx" 81 82 #include <limits> 83 #include <string> 84 85 #define MAXWORDUTF8LEN (MAXWORDLEN * 3) 86 87 class HunspellImpl 88 { 89 public: 90 HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL); 91 ~HunspellImpl(); 92 int add_dic(const char* dpath, const char* key = NULL); 93 std::vector<std::string> suffix_suggest(const std::string& root_word); 94 std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl); 95 std::vector<std::string> generate(const std::string& word, const std::string& pattern); 96 std::vector<std::string> stem(const std::string& word); 97 std::vector<std::string> stem(const std::vector<std::string>& morph); 98 std::vector<std::string> analyze(const std::string& word); 99 int get_langnum() const; 100 bool input_conv(const std::string& word, std::string& dest); 101 bool spell(const std::string& word, int* info = NULL, std::string* root = NULL); 102 std::vector<std::string> suggest(const std::string& word); 103 const std::string& get_wordchars_cpp() const; 104 const std::vector<w_char>& get_wordchars_utf16() const; 105 const std::string& get_dict_encoding() const; 106 int add(const std::string& word); 107 int add_with_affix(const std::string& word, const std::string& example); 108 int remove(const std::string& word); 109 const std::string& get_version_cpp() const; 110 struct cs_info* get_csconv(); 111 112 int spell(const char* word, int* info = NULL, char** root = NULL); 113 int suggest(char*** slst, const char* word); 114 int suffix_suggest(char*** slst, const char* root_word); 115 void free_list(char*** slst, int n); 116 char* get_dic_encoding(); 117 int analyze(char*** slst, const char* word); 118 int stem(char*** slst, const char* word); 119 int stem(char*** slst, char** morph, int n); 120 int generate(char*** slst, const char* word, const char* word2); 121 int generate(char*** slst, const char* word, char** desc, int n); 122 const char* get_wordchars() const; 123 const char* get_version() const; 124 int input_conv(const char* word, char* dest, size_t destsize); 125 126 private: 127 AffixMgr* pAMgr; 128 std::vector<HashMgr*> m_HMgrs; 129 SuggestMgr* pSMgr; 130 char* affixpath; 131 std::string encoding; 132 struct cs_info* csconv; 133 int langnum; 134 int utf8; 135 int complexprefixes; 136 std::vector<std::string> wordbreak; 137 138 private: 139 std::vector<std::string> analyze_internal(const std::string& word); 140 bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL); 141 std::vector<std::string> suggest_internal(const std::string& word, 142 bool& capitalized, size_t& abbreviated, int& captype); 143 void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev); 144 size_t cleanword2(std::string& dest, 145 std::vector<w_char>& dest_u, 146 const std::string& src, 147 int* pcaptype, 148 size_t* pabbrev); 149 void clean_ignore(std::string& dest, const std::string& src); 150 void mkinitcap(std::string& u8); 151 int mkinitcap2(std::string& u8, std::vector<w_char>& u16); 152 int mkinitsmall2(std::string& u8, std::vector<w_char>& u16); 153 void mkallcap(std::string& u8); 154 int mkallsmall2(std::string& u8, std::vector<w_char>& u16); 155 struct hentry* checkword(const std::string& source, int* info, std::string* root); 156 std::string sharps_u8_l1(const std::string& source); 157 hentry* 158 spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root); 159 int is_keepcase(const hentry* rv); 160 void insert_sug(std::vector<std::string>& slst, const std::string& word); 161 void cat_result(std::string& result, const std::string& st); 162 std::vector<std::string> spellml(const std::string& word); 163 std::string get_xml_par(const std::string& par, std::string::size_type pos); 164 std::string::size_type get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr); 165 std::vector<std::string> get_xml_list(const std::string& list, std::string::size_type pos, const char* tag); 166 int check_xml_par(const std::string& q, std::string::size_type pos, const char* attr, const char* value); 167 private: 168 HunspellImpl(const HunspellImpl&); 169 HunspellImpl& operator=(const HunspellImpl&); 170 }; 171 172 HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) { 173 csconv = NULL; 174 utf8 = 0; 175 complexprefixes = 0; 176 affixpath = mystrdup(affpath); 177 178 /* first set up the hash manager */ 179 m_HMgrs.push_back(new HashMgr(dpath, affpath, key)); 180 181 /* next set up the affix manager */ 182 /* it needs access to the hash manager lookup methods */ 183 pAMgr = new AffixMgr(affpath, m_HMgrs, key); 184 185 /* get the preferred try string and the dictionary */ 186 /* encoding from the Affix Manager for that dictionary */ 187 char* try_string = pAMgr->get_try_string(); 188 encoding = pAMgr->get_encoding(); 189 langnum = pAMgr->get_langnum(); 190 utf8 = pAMgr->get_utf8(); 191 if (!utf8) 192 csconv = get_current_cs(encoding); 193 complexprefixes = pAMgr->get_complexprefixes(); 194 wordbreak = pAMgr->get_breaktable(); 195 196 /* and finally set up the suggestion manager */ 197 pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr); 198 if (try_string) 199 free(try_string); 200 } 201 202 HunspellImpl::~HunspellImpl() { 203 delete pSMgr; 204 delete pAMgr; 205 for (size_t i = 0; i < m_HMgrs.size(); ++i) 206 delete m_HMgrs[i]; 207 pSMgr = NULL; 208 pAMgr = NULL; 209 #ifdef MOZILLA_CLIENT 210 delete[] csconv; 211 #endif 212 csconv = NULL; 213 if (affixpath) 214 free(affixpath); 215 affixpath = NULL; 216 } 217 218 // load extra dictionaries 219 int HunspellImpl::add_dic(const char* dpath, const char* key) { 220 if (!affixpath) 221 return 1; 222 m_HMgrs.push_back(new HashMgr(dpath, affixpath, key)); 223 return 0; 224 } 225 226 227 // make a copy of src at dest while removing all characters 228 // specified in IGNORE rule 229 void HunspellImpl::clean_ignore(std::string& dest, 230 const std::string& src) { 231 dest.clear(); 232 dest.assign(src); 233 const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL; 234 if (ignoredchars != NULL) { 235 if (utf8) { 236 const std::vector<w_char>& ignoredchars_utf16 = 237 pAMgr->get_ignore_utf16(); 238 remove_ignored_chars_utf(dest, ignoredchars_utf16); 239 } else { 240 remove_ignored_chars(dest, ignoredchars); 241 } 242 } 243 } 244 245 246 // make a copy of src at destination while removing all leading 247 // blanks and removing any trailing periods after recording 248 // their presence with the abbreviation flag 249 // also since already going through character by character, 250 // set the capitalization type 251 // return the length of the "cleaned" (and UTF-8 encoded) word 252 253 size_t HunspellImpl::cleanword2(std::string& dest, 254 std::vector<w_char>& dest_utf, 255 const std::string& src, 256 int* pcaptype, 257 size_t* pabbrev) { 258 dest.clear(); 259 dest_utf.clear(); 260 261 // remove IGNORE characters from the string 262 std::string w2; 263 clean_ignore(w2, src); 264 265 const char* q = w2.c_str(); 266 267 // first skip over any leading blanks 268 while (*q == ' ') 269 ++q; 270 271 // now strip off any trailing periods (recording their presence) 272 *pabbrev = 0; 273 int nl = strlen(q); 274 while ((nl > 0) && (*(q + nl - 1) == '.')) { 275 nl--; 276 (*pabbrev)++; 277 } 278 279 // if no characters are left it can't be capitalized 280 if (nl <= 0) { 281 *pcaptype = NOCAP; 282 return 0; 283 } 284 285 dest.append(q, nl); 286 nl = dest.size(); 287 if (utf8) { 288 u8_u16(dest_utf, dest); 289 *pcaptype = get_captype_utf8(dest_utf, langnum); 290 } else { 291 *pcaptype = get_captype(dest, csconv); 292 } 293 return nl; 294 } 295 296 void HunspellImpl::cleanword(std::string& dest, 297 const std::string& src, 298 int* pcaptype, 299 int* pabbrev) { 300 dest.clear(); 301 const unsigned char* q = (const unsigned char*)src.c_str(); 302 int firstcap = 0; 303 304 // first skip over any leading blanks 305 while (*q == ' ') 306 ++q; 307 308 // now strip off any trailing periods (recording their presence) 309 *pabbrev = 0; 310 int nl = strlen((const char*)q); 311 while ((nl > 0) && (*(q + nl - 1) == '.')) { 312 nl--; 313 (*pabbrev)++; 314 } 315 316 // if no characters are left it can't be capitalized 317 if (nl <= 0) { 318 *pcaptype = NOCAP; 319 return; 320 } 321 322 // now determine the capitalization type of the first nl letters 323 int ncap = 0; 324 int nneutral = 0; 325 int nc = 0; 326 327 if (!utf8) { 328 while (nl > 0) { 329 nc++; 330 if (csconv[(*q)].ccase) 331 ncap++; 332 if (csconv[(*q)].cupper == csconv[(*q)].clower) 333 nneutral++; 334 dest.push_back(*q++); 335 nl--; 336 } 337 // remember to terminate the destination string 338 firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase; 339 } else { 340 std::vector<w_char> t; 341 u8_u16(t, src); 342 for (size_t i = 0; i < t.size(); ++i) { 343 unsigned short idx = (t[i].h << 8) + t[i].l; 344 unsigned short low = unicodetolower(idx, langnum); 345 if (idx != low) 346 ncap++; 347 if (unicodetoupper(idx, langnum) == low) 348 nneutral++; 349 } 350 u16_u8(dest, t); 351 if (ncap) { 352 unsigned short idx = (t[0].h << 8) + t[0].l; 353 firstcap = (idx != unicodetolower(idx, langnum)); 354 } 355 } 356 357 // now finally set the captype 358 if (ncap == 0) { 359 *pcaptype = NOCAP; 360 } else if ((ncap == 1) && firstcap) { 361 *pcaptype = INITCAP; 362 } else if ((ncap == nc) || ((ncap + nneutral) == nc)) { 363 *pcaptype = ALLCAP; 364 } else if ((ncap > 1) && firstcap) { 365 *pcaptype = HUHINITCAP; 366 } else { 367 *pcaptype = HUHCAP; 368 } 369 } 370 371 void HunspellImpl::mkallcap(std::string& u8) { 372 if (utf8) { 373 std::vector<w_char> u16; 374 u8_u16(u16, u8); 375 ::mkallcap_utf(u16, langnum); 376 u16_u8(u8, u16); 377 } else { 378 ::mkallcap(u8, csconv); 379 } 380 } 381 382 int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) { 383 if (utf8) { 384 ::mkallsmall_utf(u16, langnum); 385 u16_u8(u8, u16); 386 } else { 387 ::mkallsmall(u8, csconv); 388 } 389 return u8.size(); 390 } 391 392 // convert UTF-8 sharp S codes to latin 1 393 std::string HunspellImpl::sharps_u8_l1(const std::string& source) { 394 std::string dest(source); 395 mystrrep(dest, "\xC3\x9F", "\xDF"); 396 return dest; 397 } 398 399 // recursive search for right ss - sharp s permutations 400 hentry* HunspellImpl::spellsharps(std::string& base, 401 size_t n_pos, 402 int n, 403 int repnum, 404 int* info, 405 std::string* root) { 406 size_t pos = base.find("ss", n_pos); 407 if (pos != std::string::npos && (n < MAXSHARPS)) { 408 base[pos] = '\xC3'; 409 base[pos + 1] = '\x9F'; 410 hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root); 411 if (h) 412 return h; 413 base[pos] = 's'; 414 base[pos + 1] = 's'; 415 h = spellsharps(base, pos + 2, n + 1, repnum, info, root); 416 if (h) 417 return h; 418 } else if (repnum > 0) { 419 if (utf8) 420 return checkword(base, info, root); 421 std::string tmp(sharps_u8_l1(base)); 422 return checkword(tmp, info, root); 423 } 424 return NULL; 425 } 426 427 int HunspellImpl::is_keepcase(const hentry* rv) { 428 return pAMgr && rv->astr && pAMgr->get_keepcase() && 429 TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen); 430 } 431 432 /* insert a word to the beginning of the suggestion array */ 433 void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) { 434 slst.insert(slst.begin(), word); 435 } 436 437 bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) { 438 bool r = spell_internal(word, info, root); 439 if (r && root) { 440 // output conversion 441 RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; 442 if (rl) { 443 std::string wspace; 444 if (rl->conv(*root, wspace)) { 445 *root = wspace; 446 } 447 } 448 } 449 return r; 450 } 451 452 bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) { 453 struct hentry* rv = NULL; 454 455 int info2 = 0; 456 if (!info) 457 info = &info2; 458 else 459 *info = 0; 460 461 // Hunspell supports XML input of the simplified API (see manual) 462 if (word == SPELL_XML) 463 return true; 464 if (utf8) { 465 if (word.size() >= MAXWORDUTF8LEN) 466 return false; 467 } else { 468 if (word.size() >= MAXWORDLEN) 469 return false; 470 } 471 int captype = NOCAP; 472 size_t abbv = 0; 473 size_t wl = 0; 474 475 std::string scw; 476 std::vector<w_char> sunicw; 477 478 // input conversion 479 RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL; 480 { 481 std::string wspace; 482 483 bool convstatus = rl ? rl->conv(word, wspace) : false; 484 if (convstatus) 485 wl = cleanword2(scw, sunicw, wspace, &captype, &abbv); 486 else 487 wl = cleanword2(scw, sunicw, word, &captype, &abbv); 488 } 489 490 #ifdef MOZILLA_CLIENT 491 // accept the abbreviated words without dots 492 // workaround for the incomplete tokenization of Mozilla 493 abbv = 1; 494 #endif 495 496 if (wl == 0 || m_HMgrs.empty()) 497 return true; 498 if (root) 499 root->clear(); 500 501 // allow numbers with dots, dashes and commas (but forbid double separators: 502 // "..", "--" etc.) 503 enum { NBEGIN, NNUM, NSEP }; 504 int nstate = NBEGIN; 505 size_t i; 506 507 for (i = 0; (i < wl); i++) { 508 if ((scw[i] <= '9') && (scw[i] >= '0')) { 509 nstate = NNUM; 510 } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) { 511 if ((nstate == NSEP) || (i == 0)) 512 break; 513 nstate = NSEP; 514 } else 515 break; 516 } 517 if ((i == wl) && (nstate == NNUM)) 518 return true; 519 520 switch (captype) { 521 case HUHCAP: 522 /* FALLTHROUGH */ 523 case HUHINITCAP: 524 *info |= SPELL_ORIGCAP; 525 /* FALLTHROUGH */ 526 case NOCAP: 527 rv = checkword(scw, info, root); 528 if ((abbv) && !(rv)) { 529 std::string u8buffer(scw); 530 u8buffer.push_back('.'); 531 rv = checkword(u8buffer, info, root); 532 } 533 break; 534 case ALLCAP: { 535 *info |= SPELL_ORIGCAP; 536 rv = checkword(scw, info, root); 537 if (rv) 538 break; 539 if (abbv) { 540 std::string u8buffer(scw); 541 u8buffer.push_back('.'); 542 rv = checkword(u8buffer, info, root); 543 if (rv) 544 break; 545 } 546 // Spec. prefix handling for Catalan, French, Italian: 547 // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia). 548 size_t apos = pAMgr ? scw.find('\'') : std::string::npos; 549 if (apos != std::string::npos) { 550 mkallsmall2(scw, sunicw); 551 //conversion may result in string with different len to pre-mkallsmall2 552 //so re-scan 553 if (apos != std::string::npos && apos < scw.size() - 1) { 554 std::string part1 = scw.substr(0, apos+1); 555 std::string part2 = scw.substr(apos+1); 556 if (utf8) { 557 std::vector<w_char> part1u, part2u; 558 u8_u16(part1u, part1); 559 u8_u16(part2u, part2); 560 mkinitcap2(part2, part2u); 561 scw = part1 + part2; 562 sunicw = part1u; 563 sunicw.insert(sunicw.end(), part2u.begin(), part2u.end()); 564 rv = checkword(scw, info, root); 565 if (rv) 566 break; 567 } else { 568 mkinitcap2(part2, sunicw); 569 scw = part1 + part2; 570 rv = checkword(scw, info, root); 571 if (rv) 572 break; 573 } 574 mkinitcap2(scw, sunicw); 575 rv = checkword(scw, info, root); 576 if (rv) 577 break; 578 } 579 } 580 if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) { 581 582 mkallsmall2(scw, sunicw); 583 std::string u8buffer(scw); 584 rv = spellsharps(u8buffer, 0, 0, 0, info, root); 585 if (!rv) { 586 mkinitcap2(scw, sunicw); 587 rv = spellsharps(scw, 0, 0, 0, info, root); 588 } 589 if ((abbv) && !(rv)) { 590 u8buffer.push_back('.'); 591 rv = spellsharps(u8buffer, 0, 0, 0, info, root); 592 if (!rv) { 593 u8buffer = std::string(scw); 594 u8buffer.push_back('.'); 595 rv = spellsharps(u8buffer, 0, 0, 0, info, root); 596 } 597 } 598 if (rv) 599 break; 600 } 601 } 602 /* FALLTHROUGH */ 603 case INITCAP: { 604 // handle special capitalization of dotted I 605 bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0); 606 *info |= SPELL_ORIGCAP; 607 if (captype == ALLCAP) { 608 mkallsmall2(scw, sunicw); 609 mkinitcap2(scw, sunicw); 610 if (Idot) 611 scw.replace(0, 1, "\xc4\xb0"); 612 } 613 if (captype == INITCAP) 614 *info |= SPELL_INITCAP; 615 rv = checkword(scw, info, root); 616 if (captype == INITCAP) 617 *info &= ~SPELL_INITCAP; 618 // forbid bad capitalization 619 // (for example, ijs -> Ijs instead of IJs in Dutch) 620 // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag) 621 if (*info & SPELL_FORBIDDEN) { 622 rv = NULL; 623 break; 624 } 625 if (rv && is_keepcase(rv) && (captype == ALLCAP)) 626 rv = NULL; 627 if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh)) 628 break; 629 630 mkallsmall2(scw, sunicw); 631 std::string u8buffer(scw); 632 mkinitcap2(scw, sunicw); 633 634 rv = checkword(u8buffer, info, root); 635 if (abbv && !rv) { 636 u8buffer.push_back('.'); 637 rv = checkword(u8buffer, info, root); 638 if (!rv) { 639 u8buffer = scw; 640 u8buffer.push_back('.'); 641 if (captype == INITCAP) 642 *info |= SPELL_INITCAP; 643 rv = checkword(u8buffer, info, root); 644 if (captype == INITCAP) 645 *info &= ~SPELL_INITCAP; 646 if (rv && is_keepcase(rv) && (captype == ALLCAP)) 647 rv = NULL; 648 break; 649 } 650 } 651 if (rv && is_keepcase(rv) && 652 ((captype == ALLCAP) || 653 // if CHECKSHARPS: KEEPCASE words with \xDF are allowed 654 // in INITCAP form, too. 655 !(pAMgr->get_checksharps() && 656 ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) || 657 (!utf8 && u8buffer.find('\xDF') != std::string::npos))))) 658 rv = NULL; 659 break; 660 } 661 } 662 663 if (rv) { 664 if (pAMgr && pAMgr->get_warn() && rv->astr && 665 TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) { 666 *info |= SPELL_WARN; 667 if (pAMgr->get_forbidwarn()) 668 return false; 669 return true; 670 } 671 return true; 672 } 673 674 // recursive breaking at break points 675 if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) { 676 677 int nbr = 0; 678 wl = scw.size(); 679 680 // calculate break points for recursion limit 681 for (size_t j = 0; j < wordbreak.size(); ++j) { 682 size_t pos = 0; 683 while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) { 684 ++nbr; 685 pos += wordbreak[j].size(); 686 } 687 } 688 if (nbr >= 10) 689 return false; 690 691 // check boundary patterns (^begin and end$) 692 for (size_t j = 0; j < wordbreak.size(); ++j) { 693 size_t plen = wordbreak[j].size(); 694 if (plen == 1 || plen > wl) 695 continue; 696 697 if (wordbreak[j][0] == '^' && 698 scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1))) 699 return true; 700 701 if (wordbreak[j][plen - 1] == '$' && 702 scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) { 703 std::string suffix(scw.substr(wl - plen + 1)); 704 scw.resize(wl - plen + 1); 705 if (spell(scw)) 706 return true; 707 scw.append(suffix); 708 } 709 } 710 711 // other patterns 712 for (size_t j = 0; j < wordbreak.size(); ++j) { 713 size_t plen = wordbreak[j].size(); 714 size_t found = scw.find(wordbreak[j]); 715 if ((found > 0) && (found < wl - plen)) { 716 size_t found2 = scw.find(wordbreak[j], found + 1); 717 // try to break at the second occurance 718 // to recognize dictionary words with wordbreak 719 if (found2 > 0 && (found2 < wl - plen)) 720 found = found2; 721 if (!spell(scw.substr(found + plen))) 722 continue; 723 std::string suffix(scw.substr(found)); 724 scw.resize(found); 725 // examine 2 sides of the break point 726 if (spell(scw)) 727 return true; 728 scw.append(suffix); 729 730 // LANG_hu: spec. dash rule 731 if (langnum == LANG_hu && wordbreak[j] == "-") { 732 suffix = scw.substr(found + 1); 733 scw.resize(found + 1); 734 if (spell(scw)) 735 return true; // check the first part with dash 736 scw.append(suffix); 737 } 738 // end of LANG specific region 739 } 740 } 741 742 // other patterns (break at first break point) 743 for (size_t j = 0; j < wordbreak.size(); ++j) { 744 size_t plen = wordbreak[j].size(); 745 size_t found = scw.find(wordbreak[j]); 746 if ((found > 0) && (found < wl - plen)) { 747 if (!spell(scw.substr(found + plen))) 748 continue; 749 std::string suffix(scw.substr(found)); 750 scw.resize(found); 751 // examine 2 sides of the break point 752 if (spell(scw)) 753 return true; 754 scw.append(suffix); 755 756 // LANG_hu: spec. dash rule 757 if (langnum == LANG_hu && wordbreak[j] == "-") { 758 suffix = scw.substr(found + 1); 759 scw.resize(found + 1); 760 if (spell(scw)) 761 return true; // check the first part with dash 762 scw.append(suffix); 763 } 764 // end of LANG specific region 765 } 766 } 767 } 768 769 return false; 770 } 771 772 struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) { 773 std::string w2; 774 const char* word; 775 int len; 776 777 // remove IGNORE characters from the string 778 clean_ignore(w2, w); 779 780 word = w2.c_str(); 781 len = w2.size(); 782 783 if (!len) 784 return NULL; 785 786 // word reversing wrapper for complex prefixes 787 if (complexprefixes) { 788 if (utf8) 789 reverseword_utf(w2); 790 else 791 reverseword(w2); 792 } 793 794 word = w2.c_str(); 795 796 // look word in hash table 797 struct hentry* he = NULL; 798 for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) { 799 he = m_HMgrs[i]->lookup(word); 800 801 // check forbidden and onlyincompound words 802 if ((he) && (he->astr) && (pAMgr) && 803 TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { 804 if (info) 805 *info |= SPELL_FORBIDDEN; 806 // LANG_hu section: set dash information for suggestions 807 if (langnum == LANG_hu) { 808 if (pAMgr->get_compoundflag() && 809 TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) { 810 if (info) 811 *info |= SPELL_COMPOUND; 812 } 813 } 814 return NULL; 815 } 816 817 // he = next not needaffix, onlyincompound homonym or onlyupcase word 818 while (he && (he->astr) && pAMgr && 819 ((pAMgr->get_needaffix() && 820 TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) || 821 (pAMgr->get_onlyincompound() && 822 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || 823 (info && (*info & SPELL_INITCAP) && 824 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) 825 he = he->next_homonym; 826 } 827 828 // check with affixes 829 if (!he && pAMgr) { 830 // try stripping off affixes */ 831 he = pAMgr->affix_check(word, len, 0); 832 833 // check compound restriction and onlyupcase 834 if (he && he->astr && 835 ((pAMgr->get_onlyincompound() && 836 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || 837 (info && (*info & SPELL_INITCAP) && 838 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) { 839 he = NULL; 840 } 841 842 if (he) { 843 if ((he->astr) && (pAMgr) && 844 TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { 845 if (info) 846 *info |= SPELL_FORBIDDEN; 847 return NULL; 848 } 849 if (root) { 850 root->assign(he->word); 851 if (complexprefixes) { 852 if (utf8) 853 reverseword_utf(*root); 854 else 855 reverseword(*root); 856 } 857 } 858 // try check compound word 859 } else if (pAMgr->get_compound()) { 860 struct hentry* rwords[100]; // buffer for COMPOUND pattern checking 861 he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info); 862 // LANG_hu section: `moving rule' with last dash 863 if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) { 864 std::string dup(word, len - 1); 865 he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info); 866 } 867 // end of LANG specific region 868 if (he) { 869 if (root) { 870 root->assign(he->word); 871 if (complexprefixes) { 872 if (utf8) 873 reverseword_utf(*root); 874 else 875 reverseword(*root); 876 } 877 } 878 if (info) 879 *info |= SPELL_COMPOUND; 880 } 881 } 882 } 883 884 return he; 885 } 886 887 std::vector<std::string> HunspellImpl::suggest(const std::string& word) { 888 bool capwords; 889 size_t abbv; 890 int captype; 891 std::vector<std::string> slst = suggest_internal(word, capwords, abbv, captype); 892 // word reversing wrapper for complex prefixes 893 if (complexprefixes) { 894 for (size_t j = 0; j < slst.size(); ++j) { 895 if (utf8) 896 reverseword_utf(slst[j]); 897 else 898 reverseword(slst[j]); 899 } 900 } 901 902 // capitalize 903 if (capwords) 904 for (size_t j = 0; j < slst.size(); ++j) { 905 mkinitcap(slst[j]); 906 } 907 908 // expand suggestions with dot(s) 909 if (abbv && pAMgr && pAMgr->get_sugswithdots()) { 910 for (size_t j = 0; j < slst.size(); ++j) { 911 slst[j].append(word.substr(word.size() - abbv)); 912 } 913 } 914 915 // remove bad capitalized and forbidden forms 916 if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { 917 switch (captype) { 918 case INITCAP: 919 case ALLCAP: { 920 size_t l = 0; 921 for (size_t j = 0; j < slst.size(); ++j) { 922 if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) { 923 std::string s; 924 std::vector<w_char> w; 925 if (utf8) { 926 u8_u16(w, slst[j]); 927 } else { 928 s = slst[j]; 929 } 930 mkallsmall2(s, w); 931 if (spell(s)) { 932 slst[l] = s; 933 ++l; 934 } else { 935 mkinitcap2(s, w); 936 if (spell(s)) { 937 slst[l] = s; 938 ++l; 939 } 940 } 941 } else { 942 slst[l] = slst[j]; 943 ++l; 944 } 945 } 946 slst.resize(l); 947 } 948 } 949 } 950 951 // remove duplications 952 size_t l = 0; 953 for (size_t j = 0; j < slst.size(); ++j) { 954 slst[l] = slst[j]; 955 for (size_t k = 0; k < l; ++k) { 956 if (slst[k] == slst[j]) { 957 --l; 958 break; 959 } 960 } 961 ++l; 962 } 963 slst.resize(l); 964 965 // output conversion 966 RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; 967 if (rl) { 968 for (size_t i = 0; rl && i < slst.size(); ++i) { 969 std::string wspace; 970 if (rl->conv(slst[i], wspace)) { 971 slst[i] = wspace; 972 } 973 } 974 } 975 return slst; 976 } 977 978 std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word, 979 bool& capwords, size_t& abbv, int& captype) { 980 captype = NOCAP; 981 abbv = 0; 982 capwords = false; 983 984 std::vector<std::string> slst; 985 986 int onlycmpdsug = 0; 987 if (!pSMgr || m_HMgrs.empty()) 988 return slst; 989 990 // process XML input of the simplified API (see manual) 991 if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) { 992 return spellml(word); 993 } 994 if (utf8) { 995 if (word.size() >= MAXWORDUTF8LEN) 996 return slst; 997 } else { 998 if (word.size() >= MAXWORDLEN) 999 return slst; 1000 } 1001 size_t wl = 0; 1002 1003 std::string scw; 1004 std::vector<w_char> sunicw; 1005 1006 // input conversion 1007 RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; 1008 { 1009 std::string wspace; 1010 1011 bool convstatus = rl ? rl->conv(word, wspace) : false; 1012 if (convstatus) 1013 wl = cleanword2(scw, sunicw, wspace, &captype, &abbv); 1014 else 1015 wl = cleanword2(scw, sunicw, word, &captype, &abbv); 1016 1017 if (wl == 0) 1018 return slst; 1019 } 1020 1021 bool good = false; 1022 1023 clock_t timelimit; 1024 // initialize in every suggestion call 1025 timelimit = clock(); 1026 1027 // check capitalized form for FORCEUCASE 1028 if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { 1029 int info = SPELL_ORIGCAP; 1030 if (checkword(scw, &info, NULL)) { 1031 std::string form(scw); 1032 mkinitcap(form); 1033 slst.push_back(form); 1034 return slst; 1035 } 1036 } 1037 1038 switch (captype) { 1039 case NOCAP: { 1040 good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); 1041 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1042 return slst; 1043 if (abbv) { 1044 std::string wspace(scw); 1045 wspace.push_back('.'); 1046 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); 1047 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1048 return slst; 1049 } 1050 break; 1051 } 1052 1053 case INITCAP: { 1054 capwords = true; 1055 good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); 1056 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1057 return slst; 1058 std::string wspace(scw); 1059 mkallsmall2(wspace, sunicw); 1060 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); 1061 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1062 return slst; 1063 break; 1064 } 1065 case HUHINITCAP: 1066 capwords = true; 1067 /* FALLTHROUGH */ 1068 case HUHCAP: { 1069 good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); 1070 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1071 return slst; 1072 // something.The -> something. The 1073 size_t dot_pos = scw.find('.'); 1074 if (dot_pos != std::string::npos) { 1075 std::string postdot = scw.substr(dot_pos + 1); 1076 int captype_; 1077 if (utf8) { 1078 std::vector<w_char> postdotu; 1079 u8_u16(postdotu, postdot); 1080 captype_ = get_captype_utf8(postdotu, langnum); 1081 } else { 1082 captype_ = get_captype(postdot, csconv); 1083 } 1084 if (captype_ == INITCAP) { 1085 std::string str(scw); 1086 str.insert(dot_pos + 1, 1, ' '); 1087 insert_sug(slst, str); 1088 } 1089 } 1090 1091 std::string wspace; 1092 1093 if (captype == HUHINITCAP) { 1094 // TheOpenOffice.org -> The OpenOffice.org 1095 wspace = scw; 1096 mkinitsmall2(wspace, sunicw); 1097 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); 1098 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1099 return slst; 1100 } 1101 wspace = scw; 1102 mkallsmall2(wspace, sunicw); 1103 if (spell(wspace.c_str())) 1104 insert_sug(slst, wspace); 1105 size_t prevns = slst.size(); 1106 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); 1107 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1108 return slst; 1109 if (captype == HUHINITCAP) { 1110 mkinitcap2(wspace, sunicw); 1111 if (spell(wspace.c_str())) 1112 insert_sug(slst, wspace); 1113 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); 1114 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1115 return slst; 1116 } 1117 // aNew -> "a New" (instead of "a new") 1118 for (size_t j = prevns; j < slst.size(); ++j) { 1119 const char* space = strchr(slst[j].c_str(), ' '); 1120 if (space) { 1121 size_t slen = strlen(space + 1); 1122 // different case after space (need capitalisation) 1123 if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) { 1124 std::string first(slst[j].c_str(), space + 1); 1125 std::string second(space + 1); 1126 std::vector<w_char> w; 1127 if (utf8) 1128 u8_u16(w, second); 1129 mkinitcap2(second, w); 1130 // set as first suggestion 1131 slst.erase(slst.begin() + j); 1132 slst.insert(slst.begin(), first + second); 1133 } 1134 } 1135 } 1136 break; 1137 } 1138 1139 case ALLCAP: { 1140 std::string wspace(scw); 1141 mkallsmall2(wspace, sunicw); 1142 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); 1143 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1144 return slst; 1145 if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str())) 1146 insert_sug(slst, wspace); 1147 mkinitcap2(wspace, sunicw); 1148 good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); 1149 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1150 return slst; 1151 for (size_t j = 0; j < slst.size(); ++j) { 1152 mkallcap(slst[j]); 1153 if (pAMgr && pAMgr->get_checksharps()) { 1154 if (utf8) { 1155 mystrrep(slst[j], "\xC3\x9F", "SS"); 1156 } else { 1157 mystrrep(slst[j], "\xDF", "SS"); 1158 } 1159 } 1160 } 1161 break; 1162 } 1163 } 1164 1165 // LANG_hu section: replace '-' with ' ' in Hungarian 1166 if (langnum == LANG_hu) { 1167 for (size_t j = 0; j < slst.size(); ++j) { 1168 size_t pos = slst[j].find('-'); 1169 if (pos != std::string::npos) { 1170 int info; 1171 std::string w(slst[j].substr(0, pos)); 1172 w.append(slst[j].substr(pos + 1)); 1173 (void)spell(w, &info, NULL); 1174 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { 1175 slst[j][pos] = ' '; 1176 } else 1177 slst[j][pos] = '-'; 1178 } 1179 } 1180 } 1181 // END OF LANG_hu section 1182 // try ngram approach since found nothing good suggestion 1183 if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) { 1184 switch (captype) { 1185 case NOCAP: { 1186 pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP); 1187 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1188 return slst; 1189 break; 1190 } 1191 /* FALLTHROUGH */ 1192 case HUHINITCAP: 1193 capwords = true; 1194 /* FALLTHROUGH */ 1195 case HUHCAP: { 1196 std::string wspace(scw); 1197 mkallsmall2(wspace, sunicw); 1198 pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP); 1199 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1200 return slst; 1201 break; 1202 } 1203 case INITCAP: { 1204 capwords = true; 1205 std::string wspace(scw); 1206 mkallsmall2(wspace, sunicw); 1207 pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP); 1208 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1209 return slst; 1210 break; 1211 } 1212 case ALLCAP: { 1213 std::string wspace(scw); 1214 mkallsmall2(wspace, sunicw); 1215 size_t oldns = slst.size(); 1216 pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP); 1217 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1218 return slst; 1219 for (size_t j = oldns; j < slst.size(); ++j) { 1220 mkallcap(slst[j]); 1221 } 1222 break; 1223 } 1224 } 1225 } 1226 1227 // try dash suggestion (Afo-American -> Afro-American) 1228 // Note: LibreOffice was modified to treat dashes as word 1229 // characters to check "scot-free" etc. word forms, but 1230 // we need to handle suggestions for "Afo-American", etc., 1231 // while "Afro-American" is missing from the dictionary. 1232 // TODO avoid possible overgeneration 1233 size_t dash_pos = scw.find('-'); 1234 if (dash_pos != std::string::npos) { 1235 int nodashsug = 1; 1236 for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) { 1237 if (slst[j].find('-') != std::string::npos) 1238 nodashsug = 0; 1239 } 1240 1241 size_t prev_pos = 0; 1242 bool last = false; 1243 1244 while (!good && nodashsug && !last) { 1245 if (dash_pos == scw.size()) 1246 last = 1; 1247 std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos); 1248 if (!spell(chunk.c_str())) { 1249 std::vector<std::string> nlst = suggest(chunk.c_str()); 1250 if (clock() > timelimit + TIMELIMIT_GLOBAL) 1251 return slst; 1252 for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) { 1253 std::string wspace = scw.substr(0, prev_pos); 1254 wspace.append(*j); 1255 if (!last) { 1256 wspace.append("-"); 1257 wspace.append(scw.substr(dash_pos + 1)); 1258 } 1259 int info = 0; 1260 if (pAMgr && pAMgr->get_forbiddenword()) 1261 checkword(wspace, &info, NULL); 1262 if (!(info & SPELL_FORBIDDEN)) 1263 insert_sug(slst, wspace); 1264 } 1265 nodashsug = 0; 1266 } 1267 if (!last) { 1268 prev_pos = dash_pos + 1; 1269 dash_pos = scw.find('-', prev_pos); 1270 } 1271 if (dash_pos == std::string::npos) 1272 dash_pos = scw.size(); 1273 } 1274 } 1275 return slst; 1276 } 1277 1278 const std::string& HunspellImpl::get_dict_encoding() const { 1279 return encoding; 1280 } 1281 1282 std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) { 1283 std::vector<std::string> slst; 1284 1285 std::string result2; 1286 if (desc.empty()) 1287 return slst; 1288 for (size_t i = 0; i < desc.size(); ++i) { 1289 1290 std::string result; 1291 1292 // add compound word parts (except the last one) 1293 const char* s = desc[i].c_str(); 1294 const char* part = strstr(s, MORPH_PART); 1295 if (part) { 1296 const char* nextpart = strstr(part + 1, MORPH_PART); 1297 while (nextpart) { 1298 std::string field; 1299 copy_field(field, part, MORPH_PART); 1300 result.append(field); 1301 part = nextpart; 1302 nextpart = strstr(part + 1, MORPH_PART); 1303 } 1304 s = part; 1305 } 1306 1307 std::string tok(s); 1308 size_t alt = 0; 1309 while ((alt = tok.find(" | ", alt)) != std::string::npos) { 1310 tok[alt + 1] = MSEP_ALT; 1311 } 1312 std::vector<std::string> pl = line_tok(tok, MSEP_ALT); 1313 for (size_t k = 0; k < pl.size(); ++k) { 1314 // add derivational suffixes 1315 if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) { 1316 // remove inflectional suffixes 1317 const size_t is = pl[k].find(MORPH_INFL_SFX); 1318 if (is != std::string::npos) 1319 pl[k].resize(is); 1320 std::vector<std::string> singlepl; 1321 singlepl.push_back(pl[k]); 1322 std::string sg = pSMgr->suggest_gen(singlepl, pl[k]); 1323 if (!sg.empty()) { 1324 std::vector<std::string> gen = line_tok(sg, MSEP_REC); 1325 for (size_t j = 0; j < gen.size(); ++j) { 1326 result2.push_back(MSEP_REC); 1327 result2.append(result); 1328 result2.append(gen[j]); 1329 } 1330 } 1331 } else { 1332 result2.push_back(MSEP_REC); 1333 result2.append(result); 1334 if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) { 1335 std::string field; 1336 copy_field(field, pl[k], MORPH_SURF_PFX); 1337 result2.append(field); 1338 } 1339 std::string field; 1340 copy_field(field, pl[k], MORPH_STEM); 1341 result2.append(field); 1342 } 1343 } 1344 } 1345 slst = line_tok(result2, MSEP_REC); 1346 uniqlist(slst); 1347 return slst; 1348 } 1349 1350 std::vector<std::string> HunspellImpl::stem(const std::string& word) { 1351 return stem(analyze(word)); 1352 } 1353 1354 const std::string& HunspellImpl::get_wordchars_cpp() const { 1355 return pAMgr->get_wordchars(); 1356 } 1357 1358 const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const { 1359 return pAMgr->get_wordchars_utf16(); 1360 } 1361 1362 void HunspellImpl::mkinitcap(std::string& u8) { 1363 if (utf8) { 1364 std::vector<w_char> u16; 1365 u8_u16(u16, u8); 1366 ::mkinitcap_utf(u16, langnum); 1367 u16_u8(u8, u16); 1368 } else { 1369 ::mkinitcap(u8, csconv); 1370 } 1371 } 1372 1373 int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) { 1374 if (utf8) { 1375 ::mkinitcap_utf(u16, langnum); 1376 u16_u8(u8, u16); 1377 } else { 1378 ::mkinitcap(u8, csconv); 1379 } 1380 return u8.size(); 1381 } 1382 1383 int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) { 1384 if (utf8) { 1385 ::mkinitsmall_utf(u16, langnum); 1386 u16_u8(u8, u16); 1387 } else { 1388 ::mkinitsmall(u8, csconv); 1389 } 1390 return u8.size(); 1391 } 1392 1393 int HunspellImpl::add(const std::string& word) { 1394 if (!m_HMgrs.empty()) 1395 return m_HMgrs[0]->add(word); 1396 return 0; 1397 } 1398 1399 int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) { 1400 if (!m_HMgrs.empty()) 1401 return m_HMgrs[0]->add_with_affix(word, example); 1402 return 0; 1403 } 1404 1405 int HunspellImpl::remove(const std::string& word) { 1406 if (!m_HMgrs.empty()) 1407 return m_HMgrs[0]->remove(word); 1408 return 0; 1409 } 1410 1411 const std::string& HunspellImpl::get_version_cpp() const { 1412 return pAMgr->get_version(); 1413 } 1414 1415 struct cs_info* HunspellImpl::get_csconv() { 1416 return csconv; 1417 } 1418 1419 void HunspellImpl::cat_result(std::string& result, const std::string& st) { 1420 if (!st.empty()) { 1421 if (!result.empty()) 1422 result.append("\n"); 1423 result.append(st); 1424 } 1425 } 1426 1427 std::vector<std::string> HunspellImpl::analyze(const std::string& word) { 1428 std::vector<std::string> slst = analyze_internal(word); 1429 // output conversion 1430 RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; 1431 if (rl) { 1432 for (size_t i = 0; rl && i < slst.size(); ++i) { 1433 std::string wspace; 1434 if (rl->conv(slst[i], wspace)) { 1435 slst[i] = wspace; 1436 } 1437 } 1438 } 1439 return slst; 1440 } 1441 1442 std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) { 1443 std::vector<std::string> slst; 1444 if (!pSMgr || m_HMgrs.empty()) 1445 return slst; 1446 if (utf8) { 1447 if (word.size() >= MAXWORDUTF8LEN) 1448 return slst; 1449 } else { 1450 if (word.size() >= MAXWORDLEN) 1451 return slst; 1452 } 1453 int captype = NOCAP; 1454 size_t abbv = 0; 1455 size_t wl = 0; 1456 1457 std::string scw; 1458 std::vector<w_char> sunicw; 1459 1460 // input conversion 1461 RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; 1462 { 1463 std::string wspace; 1464 1465 bool convstatus = rl ? rl->conv(word, wspace) : false; 1466 if (convstatus) 1467 wl = cleanword2(scw, sunicw, wspace, &captype, &abbv); 1468 else 1469 wl = cleanword2(scw, sunicw, word, &captype, &abbv); 1470 } 1471 1472 if (wl == 0) { 1473 if (abbv) { 1474 scw.clear(); 1475 for (wl = 0; wl < abbv; wl++) 1476 scw.push_back('.'); 1477 abbv = 0; 1478 } else 1479 return slst; 1480 } 1481 1482 std::string result; 1483 1484 size_t n = 0; 1485 // test numbers 1486 // LANG_hu section: set dash information for suggestions 1487 if (langnum == LANG_hu) { 1488 size_t n2 = 0; 1489 size_t n3 = 0; 1490 1491 while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) || 1492 (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) { 1493 n++; 1494 if ((scw[n] == '.') || (scw[n] == ',')) { 1495 if (((n2 == 0) && (n > 3)) || 1496 ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ',')))) 1497 break; 1498 n2++; 1499 n3 = n; 1500 } 1501 } 1502 1503 if ((n == wl) && (n3 > 0) && (n - n3 > 3)) 1504 return slst; 1505 if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) && 1506 checkword(scw.substr(n), NULL, NULL))) { 1507 result.append(scw); 1508 result.resize(n - 1); 1509 if (n == wl) 1510 cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1))); 1511 else { 1512 std::string chunk = scw.substr(n - 1, 1); 1513 cat_result(result, pSMgr->suggest_morph(chunk)); 1514 result.push_back('+'); // XXX SPEC. MORPHCODE 1515 cat_result(result, pSMgr->suggest_morph(scw.substr(n))); 1516 } 1517 return line_tok(result, MSEP_REC); 1518 } 1519 } 1520 // END OF LANG_hu section 1521 1522 switch (captype) { 1523 case HUHCAP: 1524 case HUHINITCAP: 1525 case NOCAP: { 1526 cat_result(result, pSMgr->suggest_morph(scw)); 1527 if (abbv) { 1528 std::string u8buffer(scw); 1529 u8buffer.push_back('.'); 1530 cat_result(result, pSMgr->suggest_morph(u8buffer)); 1531 } 1532 break; 1533 } 1534 case INITCAP: { 1535 mkallsmall2(scw, sunicw); 1536 std::string u8buffer(scw); 1537 mkinitcap2(scw, sunicw); 1538 cat_result(result, pSMgr->suggest_morph(u8buffer)); 1539 cat_result(result, pSMgr->suggest_morph(scw)); 1540 if (abbv) { 1541 u8buffer.push_back('.'); 1542 cat_result(result, pSMgr->suggest_morph(u8buffer)); 1543 1544 u8buffer = scw; 1545 u8buffer.push_back('.'); 1546 1547 cat_result(result, pSMgr->suggest_morph(u8buffer)); 1548 } 1549 break; 1550 } 1551 case ALLCAP: { 1552 cat_result(result, pSMgr->suggest_morph(scw)); 1553 if (abbv) { 1554 std::string u8buffer(scw); 1555 u8buffer.push_back('.'); 1556 cat_result(result, pSMgr->suggest_morph(u8buffer)); 1557 } 1558 mkallsmall2(scw, sunicw); 1559 std::string u8buffer(scw); 1560 mkinitcap2(scw, sunicw); 1561 1562 cat_result(result, pSMgr->suggest_morph(u8buffer)); 1563 cat_result(result, pSMgr->suggest_morph(scw)); 1564 if (abbv) { 1565 u8buffer.push_back('.'); 1566 cat_result(result, pSMgr->suggest_morph(u8buffer)); 1567 1568 u8buffer = scw; 1569 u8buffer.push_back('.'); 1570 1571 cat_result(result, pSMgr->suggest_morph(u8buffer)); 1572 } 1573 break; 1574 } 1575 } 1576 1577 if (!result.empty()) { 1578 // word reversing wrapper for complex prefixes 1579 if (complexprefixes) { 1580 if (utf8) 1581 reverseword_utf(result); 1582 else 1583 reverseword(result); 1584 } 1585 return line_tok(result, MSEP_REC); 1586 } 1587 1588 // compound word with dash (HU) I18n 1589 // LANG_hu section: set dash information for suggestions 1590 1591 size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos; 1592 if (dash_pos != std::string::npos) { 1593 int nresult = 0; 1594 1595 std::string part1 = scw.substr(0, dash_pos); 1596 std::string part2 = scw.substr(dash_pos+1); 1597 1598 // examine 2 sides of the dash 1599 if (part2.empty()) { // base word ending with dash 1600 if (spell(part1)) { 1601 std::string p = pSMgr->suggest_morph(part1); 1602 if (!p.empty()) { 1603 slst = line_tok(p, MSEP_REC); 1604 return slst; 1605 } 1606 } 1607 } else if (part2.size() == 1 && part2[0] == 'e') { // XXX (HU) -e hat. 1608 if (spell(part1) && (spell("-e"))) { 1609 std::string st = pSMgr->suggest_morph(part1); 1610 if (!st.empty()) { 1611 result.append(st); 1612 } 1613 result.push_back('+'); // XXX spec. separator in MORPHCODE 1614 st = pSMgr->suggest_morph("-e"); 1615 if (!st.empty()) { 1616 result.append(st); 1617 } 1618 return line_tok(result, MSEP_REC); 1619 } 1620 } else { 1621 // first word ending with dash: word- XXX ??? 1622 part1.push_back(' '); 1623 nresult = spell(part1); 1624 part1.erase(part1.size() - 1); 1625 if (nresult && spell(part2) && 1626 ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) { 1627 std::string st = pSMgr->suggest_morph(part1); 1628 if (!st.empty()) { 1629 result.append(st); 1630 result.push_back('+'); // XXX spec. separator in MORPHCODE 1631 } 1632 st = pSMgr->suggest_morph(part2); 1633 if (!st.empty()) { 1634 result.append(st); 1635 } 1636 return line_tok(result, MSEP_REC); 1637 } 1638 } 1639 // affixed number in correct word 1640 if (nresult && (dash_pos > 0) && 1641 (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) || 1642 (scw[dash_pos - 1] == '.'))) { 1643 n = 1; 1644 if (scw[dash_pos - n] == '.') 1645 n++; 1646 // search first not a number character to left from dash 1647 while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) && 1648 (n < 6)) { 1649 n++; 1650 } 1651 if (dash_pos < n) 1652 n--; 1653 // numbers: valami1000000-hoz 1654 // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz, 1655 // 56-hoz, 6-hoz 1656 for (; n >= 1; n--) { 1657 if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') { 1658 continue; 1659 } 1660 std::string chunk = scw.substr(dash_pos - n); 1661 if (checkword(chunk, NULL, NULL)) { 1662 result.append(chunk); 1663 std::string st = pSMgr->suggest_morph(chunk); 1664 if (!st.empty()) { 1665 result.append(st); 1666 } 1667 return line_tok(result, MSEP_REC); 1668 } 1669 } 1670 } 1671 } 1672 return slst; 1673 } 1674 1675 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) { 1676 std::vector<std::string> slst; 1677 if (!pSMgr || pl.empty()) 1678 return slst; 1679 std::vector<std::string> pl2 = analyze(word); 1680 int captype = NOCAP; 1681 int abbv = 0; 1682 std::string cw; 1683 cleanword(cw, word, &captype, &abbv); 1684 std::string result; 1685 1686 for (size_t i = 0; i < pl.size(); ++i) { 1687 cat_result(result, pSMgr->suggest_gen(pl2, pl[i])); 1688 } 1689 1690 if (!result.empty()) { 1691 // allcap 1692 if (captype == ALLCAP) 1693 mkallcap(result); 1694 1695 // line split 1696 slst = line_tok(result, MSEP_REC); 1697 1698 // capitalize 1699 if (captype == INITCAP || captype == HUHINITCAP) { 1700 for (size_t j = 0; j < slst.size(); ++j) { 1701 mkinitcap(slst[j]); 1702 } 1703 } 1704 1705 // temporary filtering of prefix related errors (eg. 1706 // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks") 1707 std::vector<std::string>::iterator it = slst.begin(); 1708 while (it != slst.end()) { 1709 if (!spell(*it)) { 1710 it = slst.erase(it); 1711 } else { 1712 ++it; 1713 } 1714 } 1715 } 1716 return slst; 1717 } 1718 1719 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) { 1720 std::vector<std::string> pl = analyze(pattern); 1721 std::vector<std::string> slst = generate(word, pl); 1722 uniqlist(slst); 1723 return slst; 1724 } 1725 1726 // minimal XML parser functions 1727 std::string HunspellImpl::get_xml_par(const std::string& in_par, std::string::size_type pos) { 1728 std::string dest; 1729 if (pos == std::string::npos) 1730 return dest; 1731 const char* par = in_par.c_str() + pos; 1732 char end = *par; 1733 if (end == '>') 1734 end = '<'; 1735 else if (end != '\'' && end != '"') 1736 return dest; // bad XML 1737 for (par++; *par != '\0' && *par != end; ++par) { 1738 dest.push_back(*par); 1739 } 1740 mystrrep(dest, "<", "<"); 1741 mystrrep(dest, "&", "&"); 1742 return dest; 1743 } 1744 1745 int HunspellImpl::get_langnum() const { 1746 return langnum; 1747 } 1748 1749 bool HunspellImpl::input_conv(const std::string& word, std::string& dest) { 1750 RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL; 1751 if (rl) { 1752 return rl->conv(word, dest); 1753 } 1754 dest.assign(word); 1755 return false; 1756 } 1757 1758 // return the beginning of the element (attr == NULL) or the attribute 1759 std::string::size_type HunspellImpl::get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr) { 1760 if (pos == std::string::npos) 1761 return std::string::npos; 1762 1763 std::string::size_type endpos = s.find('>', pos); 1764 if (attr == NULL) 1765 return endpos; 1766 while (true) { 1767 pos = s.find(attr, pos); 1768 if (pos == std::string::npos || pos >= endpos) 1769 return std::string::npos; 1770 if (s[pos - 1] == ' ' || s[pos - 1] == '\n') 1771 break; 1772 pos += strlen(attr); 1773 } 1774 return pos + strlen(attr); 1775 } 1776 1777 int HunspellImpl::check_xml_par(const std::string& q, std::string::size_type pos, 1778 const char* attr, 1779 const char* value) { 1780 std::string cw = get_xml_par(q, get_xml_pos(q, pos, attr)); 1781 if (cw == value) 1782 return 1; 1783 return 0; 1784 } 1785 1786 std::vector<std::string> HunspellImpl::get_xml_list(const std::string& list, std::string::size_type pos, const char* tag) { 1787 std::vector<std::string> slst; 1788 if (pos == std::string::npos) 1789 return slst; 1790 while (true) { 1791 pos = list.find(tag, pos); 1792 if (pos == std::string::npos) 1793 break; 1794 std::string cw = get_xml_par(list, pos + strlen(tag) - 1); 1795 if (cw.empty()) { 1796 break; 1797 } 1798 slst.push_back(cw); 1799 ++pos; 1800 } 1801 return slst; 1802 } 1803 1804 std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) { 1805 std::vector<std::string> slst; 1806 1807 std::string::size_type qpos = in_word.find("<query"); 1808 if (qpos == std::string::npos) 1809 return slst; // bad XML input 1810 1811 std::string::size_type q2pos = in_word.find('>', qpos); 1812 if (q2pos == std::string::npos) 1813 return slst; // bad XML input 1814 1815 q2pos = in_word.find("<word", q2pos); 1816 if (q2pos == std::string::npos) 1817 return slst; // bad XML input 1818 1819 if (check_xml_par(in_word, qpos, "type=", "analyze")) { 1820 std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); 1821 if (!cw.empty()) 1822 slst = analyze(cw); 1823 if (slst.empty()) 1824 return slst; 1825 // convert the result to <code><a>ana1</a><a>ana2</a></code> format 1826 std::string r; 1827 r.append("<code>"); 1828 for (size_t i = 0; i < slst.size(); ++i) { 1829 r.append("<a>"); 1830 1831 std::string entry(slst[i]); 1832 mystrrep(entry, "\t", " "); 1833 mystrrep(entry, "&", "&"); 1834 mystrrep(entry, "<", "<"); 1835 r.append(entry); 1836 1837 r.append("</a>"); 1838 } 1839 r.append("</code>"); 1840 slst.clear(); 1841 slst.push_back(r); 1842 return slst; 1843 } else if (check_xml_par(in_word, qpos, "type=", "stem")) { 1844 std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); 1845 if (!cw.empty()) 1846 return stem(cw); 1847 } else if (check_xml_par(in_word, qpos, "type=", "generate")) { 1848 std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); 1849 if (cw.empty()) 1850 return slst; 1851 std::string::size_type q3pos = in_word.find("<word", q2pos + 1); 1852 if (q3pos != std::string::npos) { 1853 std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos)); 1854 if (!cw2.empty()) { 1855 return generate(cw, cw2); 1856 } 1857 } else { 1858 q2pos = in_word.find("<code", q2pos + 1); 1859 if (q2pos != std::string::npos) { 1860 std::vector<std::string> slst2 = get_xml_list(in_word, in_word.find('>', q2pos), "<a>"); 1861 if (!slst2.empty()) { 1862 slst = generate(cw, slst2); 1863 uniqlist(slst); 1864 return slst; 1865 } 1866 } 1867 } 1868 } else if (check_xml_par(in_word, qpos, "type=", "add")) { 1869 std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); 1870 if (cw.empty()) 1871 return slst; 1872 std::string::size_type q3pos = in_word.find("<word", q2pos + 1); 1873 if (q3pos != std::string::npos) { 1874 std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos)); 1875 if (!cw2.empty()) { 1876 add_with_affix(cw, cw2); 1877 } else { 1878 add(cw); 1879 } 1880 } else { 1881 add(cw); 1882 } 1883 } 1884 return slst; 1885 } 1886 1887 std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) { 1888 std::vector<std::string> slst; 1889 struct hentry* he = NULL; 1890 int len; 1891 std::string w2; 1892 const char* word; 1893 const char* ignoredchars = pAMgr->get_ignore(); 1894 if (ignoredchars != NULL) { 1895 w2.assign(root_word); 1896 if (utf8) { 1897 const std::vector<w_char>& ignoredchars_utf16 = 1898 pAMgr->get_ignore_utf16(); 1899 remove_ignored_chars_utf(w2, ignoredchars_utf16); 1900 } else { 1901 remove_ignored_chars(w2, ignoredchars); 1902 } 1903 word = w2.c_str(); 1904 } else 1905 word = root_word.c_str(); 1906 1907 len = strlen(word); 1908 1909 if (!len) 1910 return slst; 1911 1912 for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) { 1913 he = m_HMgrs[i]->lookup(word); 1914 } 1915 if (he) { 1916 slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str()); 1917 } 1918 return slst; 1919 } 1920 1921 namespace { 1922 int munge_vector(char*** slst, const std::vector<std::string>& items) { 1923 if (items.empty()) { 1924 *slst = NULL; 1925 return 0; 1926 } else { 1927 *slst = (char**)malloc(sizeof(char*) * items.size()); 1928 if (!*slst) 1929 return 0; 1930 for (size_t i = 0; i < items.size(); ++i) 1931 (*slst)[i] = mystrdup(items[i].c_str()); 1932 } 1933 return items.size(); 1934 } 1935 } 1936 1937 int HunspellImpl::spell(const char* word, int* info, char** root) { 1938 std::string sroot; 1939 bool ret = spell(word, info, root ? &sroot : NULL); 1940 if (root) { 1941 if (sroot.empty()) { 1942 *root = NULL; 1943 } else { 1944 *root = mystrdup(sroot.c_str()); 1945 } 1946 } 1947 return ret; 1948 } 1949 1950 int HunspellImpl::suggest(char*** slst, const char* word) { 1951 std::vector<std::string> suggests = suggest(word); 1952 return munge_vector(slst, suggests); 1953 } 1954 1955 int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) { 1956 std::vector<std::string> stems = suffix_suggest(root_word); 1957 return munge_vector(slst, stems); 1958 } 1959 1960 void HunspellImpl::free_list(char*** slst, int n) { 1961 if (slst && *slst) { 1962 for (int i = 0; i < n; i++) 1963 free((*slst)[i]); 1964 free(*slst); 1965 *slst = NULL; 1966 } 1967 } 1968 1969 char* HunspellImpl::get_dic_encoding() { 1970 return &encoding[0]; 1971 } 1972 1973 int HunspellImpl::analyze(char*** slst, const char* word) { 1974 std::vector<std::string> stems = analyze(word); 1975 return munge_vector(slst, stems); 1976 } 1977 1978 int HunspellImpl::stem(char*** slst, const char* word) { 1979 std::vector<std::string> stems = stem(word); 1980 return munge_vector(slst, stems); 1981 } 1982 1983 int HunspellImpl::stem(char*** slst, char** desc, int n) { 1984 std::vector<std::string> morph; 1985 morph.reserve(n); 1986 for (int i = 0; i < n; ++i) 1987 morph.push_back(desc[i]); 1988 1989 std::vector<std::string> stems = stem(morph); 1990 return munge_vector(slst, stems); 1991 } 1992 1993 int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) { 1994 std::vector<std::string> stems = generate(word, pattern); 1995 return munge_vector(slst, stems); 1996 } 1997 1998 int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) { 1999 std::vector<std::string> morph; 2000 morph.reserve(pln); 2001 for (int i = 0; i < pln; ++i) 2002 morph.push_back(pl[i]); 2003 2004 std::vector<std::string> stems = generate(word, morph); 2005 return munge_vector(slst, stems); 2006 } 2007 2008 const char* HunspellImpl::get_wordchars() const { 2009 return get_wordchars_cpp().c_str(); 2010 } 2011 2012 const char* HunspellImpl::get_version() const { 2013 return get_version_cpp().c_str(); 2014 } 2015 2016 int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) { 2017 std::string d; 2018 bool ret = input_conv(word, d); 2019 if (ret && d.size() < destsize) { 2020 strncpy(dest, d.c_str(), destsize); 2021 return 1; 2022 } 2023 return 0; 2024 } 2025 2026 Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) 2027 : m_Impl(new HunspellImpl(affpath, dpath, key)) { 2028 } 2029 2030 Hunspell::~Hunspell() { 2031 delete m_Impl; 2032 } 2033 2034 // load extra dictionaries 2035 int Hunspell::add_dic(const char* dpath, const char* key) { 2036 return m_Impl->add_dic(dpath, key); 2037 } 2038 2039 bool Hunspell::spell(const std::string& word, int* info, std::string* root) { 2040 return m_Impl->spell(word, info, root); 2041 } 2042 2043 std::vector<std::string> Hunspell::suggest(const std::string& word) { 2044 return m_Impl->suggest(word); 2045 } 2046 2047 std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) { 2048 return m_Impl->suffix_suggest(root_word); 2049 } 2050 2051 const std::string& Hunspell::get_dict_encoding() const { 2052 return m_Impl->get_dict_encoding(); 2053 } 2054 2055 std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) { 2056 return m_Impl->stem(desc); 2057 } 2058 2059 std::vector<std::string> Hunspell::stem(const std::string& word) { 2060 return m_Impl->stem(word); 2061 } 2062 2063 const std::string& Hunspell::get_wordchars_cpp() const { 2064 return m_Impl->get_wordchars_cpp(); 2065 } 2066 2067 const std::vector<w_char>& Hunspell::get_wordchars_utf16() const { 2068 return m_Impl->get_wordchars_utf16(); 2069 } 2070 2071 int Hunspell::add(const std::string& word) { 2072 return m_Impl->add(word); 2073 } 2074 2075 int Hunspell::add_with_affix(const std::string& word, const std::string& example) { 2076 return m_Impl->add_with_affix(word, example); 2077 } 2078 2079 int Hunspell::remove(const std::string& word) { 2080 return m_Impl->remove(word); 2081 } 2082 2083 const std::string& Hunspell::get_version_cpp() const { 2084 return m_Impl->get_version_cpp(); 2085 } 2086 2087 struct cs_info* Hunspell::get_csconv() { 2088 return m_Impl->get_csconv(); 2089 } 2090 2091 std::vector<std::string> Hunspell::analyze(const std::string& word) { 2092 return m_Impl->analyze(word); 2093 } 2094 2095 std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) { 2096 return m_Impl->generate(word, pl); 2097 } 2098 2099 std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) { 2100 return m_Impl->generate(word, pattern); 2101 } 2102 2103 int Hunspell::get_langnum() const { 2104 return m_Impl->get_langnum(); 2105 } 2106 2107 bool Hunspell::input_conv(const std::string& word, std::string& dest) { 2108 return m_Impl->input_conv(word, dest); 2109 } 2110 2111 int Hunspell::spell(const char* word, int* info, char** root) { 2112 return m_Impl->spell(word, info, root); 2113 } 2114 2115 int Hunspell::suggest(char*** slst, const char* word) { 2116 return m_Impl->suggest(slst, word); 2117 } 2118 2119 int Hunspell::suffix_suggest(char*** slst, const char* root_word) { 2120 return m_Impl->suffix_suggest(slst, root_word); 2121 } 2122 2123 void Hunspell::free_list(char*** slst, int n) { 2124 m_Impl->free_list(slst, n); 2125 } 2126 2127 char* Hunspell::get_dic_encoding() { 2128 return m_Impl->get_dic_encoding(); 2129 } 2130 2131 int Hunspell::analyze(char*** slst, const char* word) { 2132 return m_Impl->analyze(slst, word); 2133 } 2134 2135 int Hunspell::stem(char*** slst, const char* word) { 2136 return m_Impl->stem(slst, word); 2137 } 2138 2139 int Hunspell::stem(char*** slst, char** desc, int n) { 2140 return m_Impl->stem(slst, desc, n); 2141 } 2142 2143 int Hunspell::generate(char*** slst, const char* word, const char* pattern) { 2144 return m_Impl->generate(slst, word, pattern); 2145 } 2146 2147 int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) { 2148 return m_Impl->generate(slst, word, pl, pln); 2149 } 2150 2151 const char* Hunspell::get_wordchars() const { 2152 return m_Impl->get_wordchars(); 2153 } 2154 2155 const char* Hunspell::get_version() const { 2156 return m_Impl->get_version(); 2157 } 2158 2159 int Hunspell::input_conv(const char* word, char* dest, size_t destsize) { 2160 return m_Impl->input_conv(word, dest, destsize); 2161 } 2162 2163 Hunhandle* Hunspell_create(const char* affpath, const char* dpath) { 2164 return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath)); 2165 } 2166 2167 Hunhandle* Hunspell_create_key(const char* affpath, 2168 const char* dpath, 2169 const char* key) { 2170 return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key)); 2171 } 2172 2173 void Hunspell_destroy(Hunhandle* pHunspell) { 2174 delete reinterpret_cast<HunspellImpl*>(pHunspell); 2175 } 2176 2177 int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) { 2178 return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath); 2179 } 2180 2181 int Hunspell_spell(Hunhandle* pHunspell, const char* word) { 2182 return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word); 2183 } 2184 2185 char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) { 2186 return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding(); 2187 } 2188 2189 int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) { 2190 return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word); 2191 } 2192 2193 int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) { 2194 return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word); 2195 } 2196 2197 int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) { 2198 return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word); 2199 } 2200 2201 int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) { 2202 return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n); 2203 } 2204 2205 int Hunspell_generate(Hunhandle* pHunspell, 2206 char*** slst, 2207 const char* word, 2208 const char* pattern) 2209 { 2210 return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern); 2211 } 2212 2213 int Hunspell_generate2(Hunhandle* pHunspell, 2214 char*** slst, 2215 const char* word, 2216 char** desc, 2217 int n) 2218 { 2219 return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n); 2220 } 2221 2222 /* functions for run-time modification of the dictionary */ 2223 2224 /* add word to the run-time dictionary */ 2225 2226 int Hunspell_add(Hunhandle* pHunspell, const char* word) { 2227 return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word); 2228 } 2229 2230 /* add word to the run-time dictionary with affix flags of 2231 * the example (a dictionary word): Hunspell will recognize 2232 * affixed forms of the new word, too. 2233 */ 2234 2235 int Hunspell_add_with_affix(Hunhandle* pHunspell, 2236 const char* word, 2237 const char* example) { 2238 return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example); 2239 } 2240 2241 /* remove word from the run-time dictionary */ 2242 2243 int Hunspell_remove(Hunhandle* pHunspell, const char* word) { 2244 return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word); 2245 } 2246 2247 void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) { 2248 reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n); 2249 }