hashmgr.cxx (44011B)
1 /* ***** BEGIN LICENSE BLOCK ***** 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 * 4 * Copyright (C) 2002-2022 Németh László 5 * 6 * The contents of this file are subject to the Mozilla Public License Version 7 * 1.1 (the "License"); you may not use this file except in compliance with 8 * the License. You may obtain a copy of the License at 9 * http://www.mozilla.org/MPL/ 10 * 11 * Software distributed under the License is distributed on an "AS IS" basis, 12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 13 * for the specific language governing rights and limitations under the 14 * License. 15 * 16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. 17 * 18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, 19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, 20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, 21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, 22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen 23 * 24 * Alternatively, the contents of this file may be used under the terms of 25 * either the GNU General Public License Version 2 or later (the "GPL"), or 26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 * in which case the provisions of the GPL or the LGPL are applicable instead 28 * of those above. If you wish to allow use of your version of this file only 29 * under the terms of either the GPL or the LGPL, and not to allow others to 30 * use your version of this file under the terms of the MPL, indicate your 31 * decision by deleting the provisions above and replace them with the notice 32 * and other provisions required by the GPL or the LGPL. If you do not delete 33 * the provisions above, a recipient may use your version of this file under 34 * the terms of any one of the MPL, the GPL or the LGPL. 35 * 36 * ***** END LICENSE BLOCK ***** */ 37 /* 38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada 39 * And Contributors. All rights reserved. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 52 * 3. All modifications to the source code must be clearly marked as 53 * such. Binary redistributions based on modified source code 54 * must be clearly marked as modified versions in the documentation 55 * and/or other materials provided with the distribution. 56 * 57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS 58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 #include <assert.h> 72 #include <stdlib.h> 73 #include <string.h> 74 #include <stdio.h> 75 #include <ctype.h> 76 #include <limits> 77 #include <sstream> 78 79 #include "hashmgr.hxx" 80 #include "csutil.hxx" 81 #include "atypes.hxx" 82 #include "langnum.hxx" 83 84 // build a hash table from a munched word list 85 86 HashMgr::HashMgr(const char* tpath, const char* apath, const char* key) 87 : tablesize(0), 88 tableptr(NULL), 89 flag_mode(FLAG_CHAR), 90 complexprefixes(0), 91 utf8(0), 92 forbiddenword(FORBIDDENWORD) // forbidden word signing flag 93 , 94 numaliasf(0), 95 aliasf(NULL), 96 aliasflen(0), 97 numaliasm(0), 98 aliasm(NULL) { 99 langnum = 0; 100 csconv = 0; 101 load_config(apath, key); 102 int ec = load_tables(tpath, key); 103 if (ec) { 104 /* error condition - what should we do here */ 105 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec); 106 free(tableptr); 107 //keep tablesize to 1 to fix possible division with zero 108 tablesize = 1; 109 tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*)); 110 if (!tableptr) { 111 tablesize = 0; 112 } 113 } 114 } 115 116 HashMgr::~HashMgr() { 117 if (tableptr) { 118 // now pass through hash table freeing up everything 119 // go through column by column of the table 120 for (int i = 0; i < tablesize; i++) { 121 struct hentry* pt = tableptr[i]; 122 struct hentry* nt = NULL; 123 while (pt) { 124 nt = pt->next; 125 if (pt->astr && 126 (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) 127 arena_free(pt->astr); 128 arena_free(pt); 129 pt = nt; 130 } 131 } 132 free(tableptr); 133 } 134 tablesize = 0; 135 136 if (aliasf) { 137 for (int j = 0; j < (numaliasf); j++) 138 arena_free(aliasf[j]); 139 arena_free(aliasf); 140 aliasf = NULL; 141 if (aliasflen) { 142 arena_free(aliasflen); 143 aliasflen = NULL; 144 } 145 } 146 if (aliasm) { 147 for (int j = 0; j < (numaliasm); j++) 148 arena_free(aliasm[j]); 149 arena_free(aliasm); 150 aliasm = NULL; 151 } 152 153 #ifndef OPENOFFICEORG 154 #ifndef MOZILLA_CLIENT 155 if (utf8) 156 free_utf_tbl(); 157 #endif 158 #endif 159 160 #ifdef MOZILLA_CLIENT 161 delete[] csconv; 162 #endif 163 164 assert(outstanding_arena_allocations == 0); 165 } 166 167 // lookup a root word in the hashtable 168 169 struct hentry* HashMgr::lookup(const char* word) const { 170 struct hentry* dp; 171 if (tableptr) { 172 dp = tableptr[hash(word)]; 173 if (!dp) 174 return NULL; 175 for (; dp != NULL; dp = dp->next) { 176 if (strcmp(word, dp->word) == 0) 177 return dp; 178 } 179 } 180 return NULL; 181 } 182 183 // add a word to the hash table (private) 184 int HashMgr::add_word(const std::string& in_word, 185 int wcl, 186 unsigned short* aff, 187 int al, 188 const std::string* in_desc, 189 bool onlyupcase, 190 int captype) { 191 const std::string* word = &in_word; 192 const std::string* desc = in_desc; 193 194 std::string *word_copy = NULL; 195 std::string *desc_copy = NULL; 196 if ((!ignorechars.empty() && !has_no_ignored_chars(in_word, ignorechars)) || complexprefixes) { 197 word_copy = new std::string(in_word); 198 199 if (!ignorechars.empty()) { 200 if (utf8) { 201 wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16); 202 } else { 203 remove_ignored_chars(*word_copy, ignorechars); 204 } 205 } 206 207 if (complexprefixes) { 208 if (utf8) 209 wcl = reverseword_utf(*word_copy); 210 else 211 reverseword(*word_copy); 212 213 if (in_desc && !aliasm) { 214 desc_copy = new std::string(*in_desc); 215 216 if (complexprefixes) { 217 if (utf8) 218 reverseword_utf(*desc_copy); 219 else 220 reverseword(*desc_copy); 221 } 222 desc = desc_copy; 223 } 224 } 225 226 word = word_copy; 227 } 228 229 bool upcasehomonym = false; 230 int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0; 231 // variable-length hash record with word and optional fields 232 struct hentry* hp = 233 (struct hentry*)arena_alloc(sizeof(struct hentry) + word->size() + descl); 234 if (!hp) { 235 delete desc_copy; 236 delete word_copy; 237 return 1; 238 } 239 240 char* hpw = hp->word; 241 strcpy(hpw, word->c_str()); 242 243 int i = hash(hpw); 244 245 hp->blen = (unsigned char)word->size(); 246 hp->clen = (unsigned char)wcl; 247 hp->alen = (short)al; 248 hp->astr = aff; 249 hp->next = NULL; 250 hp->next_homonym = NULL; 251 hp->var = (captype == INITCAP) ? H_OPT_INITCAP : 0; 252 253 // store the description string or its pointer 254 if (desc) { 255 hp->var |= H_OPT; 256 if (aliasm) { 257 hp->var |= H_OPT_ALIASM; 258 store_pointer(hpw + word->size() + 1, get_aliasm(atoi(desc->c_str()))); 259 } else { 260 strcpy(hpw + word->size() + 1, desc->c_str()); 261 } 262 if (strstr(HENTRY_DATA(hp), MORPH_PHON)) { 263 hp->var |= H_OPT_PHON; 264 // store ph: fields (pronounciation, misspellings, old orthography etc.) 265 // of a morphological description in reptable to use in REP replacements. 266 if (reptable.capacity() < (unsigned int)(tablesize/MORPH_PHON_RATIO)) 267 reptable.reserve(tablesize/MORPH_PHON_RATIO); 268 std::string fields = HENTRY_DATA(hp); 269 std::string::const_iterator iter = fields.begin(); 270 std::string::const_iterator start_piece = mystrsep(fields, iter); 271 while (start_piece != fields.end()) { 272 if (std::string(start_piece, iter).find(MORPH_PHON) == 0) { 273 std::string ph = std::string(start_piece, iter).substr(sizeof MORPH_PHON - 1); 274 if (ph.size() > 0) { 275 std::vector<w_char> w; 276 size_t strippatt; 277 std::string wordpart; 278 // dictionary based REP replacement, separated by "->" 279 // for example "pretty ph:prity ph:priti->pretti" to handle 280 // both prity -> pretty and pritier -> prettiest suggestions. 281 if (((strippatt = ph.find("->")) != std::string::npos) && 282 (strippatt > 0) && (strippatt < ph.size() - 2)) { 283 wordpart = ph.substr(strippatt + 2); 284 ph.erase(ph.begin() + strippatt, ph.end()); 285 } else 286 wordpart = in_word; 287 // when the ph: field ends with the character *, 288 // strip last character of the pattern and the replacement 289 // to match in REP suggestions also at character changes, 290 // for example, "pretty ph:prity*" results "prit->prett" 291 // REP replacement instead of "prity->pretty", to get 292 // prity->pretty and pritiest->prettiest suggestions. 293 if (ph.at(ph.size()-1) == '*') { 294 strippatt = 1; 295 size_t stripword = 0; 296 if (utf8) { 297 while ((strippatt < ph.size()) && 298 ((ph.at(ph.size()-strippatt-1) & 0xc0) == 0x80)) 299 ++strippatt; 300 while ((stripword < wordpart.size()) && 301 ((wordpart.at(wordpart.size()-stripword-1) & 0xc0) == 0x80)) 302 ++stripword; 303 } 304 ++strippatt; 305 ++stripword; 306 if ((ph.size() > strippatt) && (wordpart.size() > stripword)) { 307 ph.erase(ph.size()-strippatt, strippatt); 308 wordpart.erase(in_word.size()-stripword, stripword); 309 } 310 } 311 // capitalize lowercase pattern for capitalized words to support 312 // good suggestions also for capitalized misspellings, eg. 313 // Wednesday ph:wendsay 314 // results wendsay -> Wednesday and Wendsay -> Wednesday, too. 315 if (captype==INITCAP) { 316 std::string ph_capitalized; 317 if (utf8) { 318 u8_u16(w, ph); 319 if (get_captype_utf8(w, langnum) == NOCAP) { 320 mkinitcap_utf(w, langnum); 321 u16_u8(ph_capitalized, w); 322 } 323 } else if (get_captype(ph, csconv) == NOCAP) 324 mkinitcap(ph_capitalized, csconv); 325 326 if (ph_capitalized.size() > 0) { 327 // add also lowercase word in the case of German or 328 // Hungarian to support lowercase suggestions lowercased by 329 // compound word generation or derivational suffixes 330 // (for example by adjectival suffix "-i" of geographical 331 // names in Hungarian: 332 // Massachusetts ph:messzecsuzec 333 // messzecsuzeci -> massachusettsi (adjective) 334 // For lowercasing by conditional PFX rules, see 335 // tests/germancompounding test example or the 336 // Hungarian dictionary.) 337 if (langnum == LANG_de || langnum == LANG_hu) { 338 std::string wordpart_lower(wordpart); 339 if (utf8) { 340 u8_u16(w, wordpart_lower); 341 mkallsmall_utf(w, langnum); 342 u16_u8(wordpart_lower, w); 343 } else { 344 mkallsmall(wordpart_lower, csconv); 345 } 346 reptable.push_back(replentry()); 347 reptable.back().pattern.assign(ph); 348 reptable.back().outstrings[0].assign(wordpart_lower); 349 } 350 reptable.push_back(replentry()); 351 reptable.back().pattern.assign(ph_capitalized); 352 reptable.back().outstrings[0].assign(wordpart); 353 } 354 } 355 reptable.push_back(replentry()); 356 reptable.back().pattern.assign(ph); 357 reptable.back().outstrings[0].assign(wordpart); 358 } 359 } 360 start_piece = mystrsep(fields, iter); 361 } 362 } 363 } 364 365 struct hentry* dp = tableptr[i]; 366 if (!dp) { 367 tableptr[i] = hp; 368 delete desc_copy; 369 delete word_copy; 370 return 0; 371 } 372 while (dp->next != NULL) { 373 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { 374 // remove hidden onlyupcase homonym 375 if (!onlyupcase) { 376 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { 377 arena_free(dp->astr); 378 dp->astr = hp->astr; 379 dp->alen = hp->alen; 380 arena_free(hp); 381 delete desc_copy; 382 delete word_copy; 383 return 0; 384 } else { 385 dp->next_homonym = hp; 386 } 387 } else { 388 upcasehomonym = true; 389 } 390 } 391 dp = dp->next; 392 } 393 if (strcmp(hp->word, dp->word) == 0) { 394 // remove hidden onlyupcase homonym 395 if (!onlyupcase) { 396 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { 397 arena_free(dp->astr); 398 dp->astr = hp->astr; 399 dp->alen = hp->alen; 400 arena_free(hp); 401 delete desc_copy; 402 delete word_copy; 403 return 0; 404 } else { 405 dp->next_homonym = hp; 406 } 407 } else { 408 upcasehomonym = true; 409 } 410 } 411 if (!upcasehomonym) { 412 dp->next = hp; 413 } else { 414 // remove hidden onlyupcase homonym 415 if (hp->astr) 416 arena_free(hp->astr); 417 arena_free(hp); 418 } 419 420 delete desc_copy; 421 delete word_copy; 422 return 0; 423 } 424 425 int HashMgr::add_hidden_capitalized_word(const std::string& word, 426 int wcl, 427 unsigned short* flags, 428 int flagslen, 429 const std::string* dp, 430 int captype) { 431 if (flags == NULL) 432 flagslen = 0; 433 434 // add inner capitalized forms to handle the following allcap forms: 435 // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG 436 // Allcaps with suffixes: CIA's -> CIA'S 437 if (((captype == HUHCAP) || (captype == HUHINITCAP) || 438 ((captype == ALLCAP) && (flagslen != 0))) && 439 !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { 440 unsigned short* flags2 = 441 (unsigned short*)arena_alloc(sizeof(unsigned short) * (flagslen + 1)); 442 if (!flags2) 443 return 1; 444 if (flagslen) 445 memcpy(flags2, flags, flagslen * sizeof(unsigned short)); 446 flags2[flagslen] = ONLYUPCASEFLAG; 447 if (utf8) { 448 std::string st; 449 std::vector<w_char> w; 450 u8_u16(w, word); 451 mkallsmall_utf(w, langnum); 452 mkinitcap_utf(w, langnum); 453 u16_u8(st, w); 454 return add_word(st, wcl, flags2, flagslen + 1, dp, true, INITCAP); 455 } else { 456 std::string new_word(word); 457 mkallsmall(new_word, csconv); 458 mkinitcap(new_word, csconv); 459 int ret = add_word(new_word, wcl, flags2, flagslen + 1, dp, true, INITCAP); 460 return ret; 461 } 462 } 463 return 0; 464 } 465 466 // detect captype and modify word length for UTF-8 encoding 467 int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) { 468 int len; 469 if (utf8) { 470 len = u8_u16(workbuf, word); 471 *captype = get_captype_utf8(workbuf, langnum); 472 } else { 473 len = word.size(); 474 *captype = get_captype(word, csconv); 475 } 476 return len; 477 } 478 479 int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { 480 std::vector<w_char> workbuf; 481 return get_clen_and_captype(word, captype, workbuf); 482 } 483 484 // remove word (personal dictionary function for standalone applications) 485 int HashMgr::remove(const std::string& word) { 486 struct hentry* dp = lookup(word.c_str()); 487 while (dp) { 488 if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { 489 unsigned short* flags = 490 (unsigned short*)arena_alloc(sizeof(unsigned short) * (dp->alen + 1)); 491 if (!flags) 492 return 1; 493 for (int i = 0; i < dp->alen; i++) 494 flags[i] = dp->astr[i]; 495 flags[dp->alen] = forbiddenword; 496 arena_free(dp->astr); 497 dp->astr = flags; 498 dp->alen++; 499 std::sort(flags, flags + dp->alen); 500 } 501 dp = dp->next_homonym; 502 } 503 return 0; 504 } 505 506 /* remove forbidden flag to add a personal word to the hash */ 507 int HashMgr::remove_forbidden_flag(const std::string& word) { 508 struct hentry* dp = lookup(word.c_str()); 509 if (!dp) 510 return 1; 511 while (dp) { 512 if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) 513 dp->alen = 0; // XXX forbidden words of personal dic. 514 dp = dp->next_homonym; 515 } 516 return 0; 517 } 518 519 // add a custom dic. word to the hash table (public) 520 int HashMgr::add(const std::string& word) { 521 if (remove_forbidden_flag(word)) { 522 int captype; 523 int al = 0; 524 unsigned short* flags = NULL; 525 int wcl = get_clen_and_captype(word, &captype); 526 add_word(word, wcl, flags, al, NULL, false, captype); 527 return add_hidden_capitalized_word(word, wcl, flags, al, NULL, 528 captype); 529 } 530 return 0; 531 } 532 533 int HashMgr::add_with_affix(const std::string& word, const std::string& example) { 534 // detect captype and modify word length for UTF-8 encoding 535 struct hentry* dp = lookup(example.c_str()); 536 remove_forbidden_flag(word); 537 if (dp && dp->astr) { 538 int captype; 539 int wcl = get_clen_and_captype(word, &captype); 540 if (aliasf) { 541 add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype); 542 } else { 543 unsigned short* flags = 544 (unsigned short*) arena_alloc(dp->alen * sizeof(unsigned short)); 545 if (flags) { 546 memcpy((void*)flags, (void*)dp->astr, 547 dp->alen * sizeof(unsigned short)); 548 add_word(word, wcl, flags, dp->alen, NULL, false, captype); 549 } else 550 return 1; 551 } 552 return add_hidden_capitalized_word(word, wcl, dp->astr, 553 dp->alen, NULL, captype); 554 } 555 return 1; 556 } 557 558 // walk the hash table entry by entry - null at end 559 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); 560 struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const { 561 if (hp && hp->next != NULL) 562 return hp->next; 563 for (col++; col < tablesize; col++) { 564 if (tableptr[col]) 565 return tableptr[col]; 566 } 567 // null at end and reset to start 568 col = -1; 569 return NULL; 570 } 571 572 // load a munched word list and build a hash table on the fly 573 int HashMgr::load_tables(const char* tpath, const char* key) { 574 // open dictionary file 575 FileMgr* dict = new FileMgr(tpath, key); 576 if (dict == NULL) 577 return 1; 578 579 // first read the first line of file to get hash table size */ 580 std::string ts; 581 if (!dict->getline(ts)) { 582 HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath); 583 delete dict; 584 return 2; 585 } 586 mychomp(ts); 587 588 /* remove byte order mark */ 589 if (ts.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) { 590 ts.erase(0, 3); 591 } 592 593 tablesize = atoi(ts.c_str()); 594 595 int nExtra = 5 + USERWORD; 596 597 if (tablesize <= 0 || 598 (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / 599 int(sizeof(struct hentry*)))) { 600 HUNSPELL_WARNING( 601 stderr, "error: line 1: missing or bad word count in the dic file\n"); 602 delete dict; 603 return 4; 604 } 605 tablesize += nExtra; 606 if ((tablesize % 2) == 0) 607 tablesize++; 608 609 // allocate the hash table 610 tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*)); 611 if (!tableptr) { 612 delete dict; 613 return 3; 614 } 615 616 // loop through all words on much list and add to hash 617 // table and create word and affix strings 618 619 std::vector<w_char> workbuf; 620 621 while (dict->getline(ts)) { 622 mychomp(ts); 623 // split each line into word and morphological description 624 size_t dp_pos = 0; 625 while ((dp_pos = ts.find(':', dp_pos)) != std::string::npos) { 626 if ((dp_pos > 3) && (ts[dp_pos - 3] == ' ' || ts[dp_pos - 3] == '\t')) { 627 for (dp_pos -= 3; dp_pos > 0 && (ts[dp_pos-1] == ' ' || ts[dp_pos-1] == '\t'); --dp_pos) 628 ; 629 if (dp_pos == 0) { // missing word 630 dp_pos = std::string::npos; 631 } else { 632 ++dp_pos; 633 } 634 break; 635 } 636 ++dp_pos; 637 } 638 639 // tabulator is the old morphological field separator 640 size_t dp2_pos = ts.find('\t'); 641 if (dp2_pos != std::string::npos && (dp_pos == std::string::npos || dp2_pos < dp_pos)) { 642 dp_pos = dp2_pos + 1; 643 } 644 645 std::string dp; 646 if (dp_pos != std::string::npos) { 647 dp.assign(ts.substr(dp_pos)); 648 ts.resize(dp_pos - 1); 649 } 650 651 // split each line into word and affix char strings 652 // "\/" signs slash in words (not affix separator) 653 // "/" at beginning of the line is word character (not affix separator) 654 size_t ap_pos = ts.find('/'); 655 while (ap_pos != std::string::npos) { 656 if (ap_pos == 0) { 657 ++ap_pos; 658 continue; 659 } else if (ts[ap_pos - 1] != '\\') 660 break; 661 // replace "\/" with "/" 662 ts.erase(ap_pos - 1, 1); 663 ap_pos = ts.find('/', ap_pos); 664 } 665 666 unsigned short* flags; 667 int al; 668 if (ap_pos != std::string::npos && ap_pos != ts.size()) { 669 std::string ap(ts.substr(ap_pos + 1)); 670 ts.resize(ap_pos); 671 if (aliasf) { 672 int index = atoi(ap.c_str()); 673 al = get_aliasf(index, &flags, dict); 674 if (!al) { 675 HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", 676 dict->getlinenum()); 677 } 678 } else { 679 al = decode_flags(&flags, ap.c_str(), dict, /* arena = */ true); 680 if (al == -1) { 681 HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); 682 delete dict; 683 return 6; 684 } 685 std::sort(flags, flags + al); 686 } 687 } else { 688 al = 0; 689 flags = NULL; 690 } 691 692 int captype; 693 int wcl = get_clen_and_captype(ts, &captype, workbuf); 694 const std::string *dp_str = dp.empty() ? NULL : &dp; 695 // add the word and its index plus its capitalized form optionally 696 if (add_word(ts, wcl, flags, al, dp_str, false, captype) || 697 add_hidden_capitalized_word(ts, wcl, flags, al, dp_str, captype)) { 698 delete dict; 699 return 5; 700 } 701 } 702 703 delete dict; 704 return 0; 705 } 706 707 // the hash function is a simple load and rotate 708 // algorithm borrowed 709 int HashMgr::hash(const char* word) const { 710 unsigned long hv = 0; 711 for (int i = 0; i < 4 && *word != 0; i++) 712 hv = (hv << 8) | (*word++); 713 while (*word != 0) { 714 ROTATE(hv, ROTATE_LEN); 715 hv ^= (*word++); 716 } 717 return (unsigned long)hv % tablesize; 718 } 719 720 int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const { 721 auto alloc = [arena, this](int n) { return arena ? this->arena_alloc(n) : malloc(n); }; 722 int len; 723 if (flags.empty()) { 724 *result = NULL; 725 return 0; 726 } 727 switch (flag_mode) { 728 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) 729 len = flags.size(); 730 if (len % 2 == 1) 731 HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", 732 af->getlinenum()); 733 len /= 2; 734 *result = (unsigned short*)alloc(len * sizeof(unsigned short)); 735 if (!*result) 736 return -1; 737 for (int i = 0; i < len; i++) { 738 (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) + 739 (unsigned char)flags[i * 2 + 1]; 740 } 741 break; 742 } 743 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 744 // 23 233) 745 len = 1; 746 unsigned short* dest; 747 for (size_t i = 0; i < flags.size(); ++i) { 748 if (flags[i] == ',') 749 len++; 750 } 751 *result = (unsigned short*)alloc(len * sizeof(unsigned short)); 752 if (!*result) 753 return -1; 754 dest = *result; 755 const char* src = flags.c_str(); 756 for (const char* p = src; *p; p++) { 757 if (*p == ',') { 758 int i = atoi(src); 759 if (i >= DEFAULTFLAGS) 760 HUNSPELL_WARNING( 761 stderr, "error: line %d: flag id %d is too large (max: %d)\n", 762 af->getlinenum(), i, DEFAULTFLAGS - 1); 763 *dest = (unsigned short)i; 764 if (*dest == 0) 765 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", 766 af->getlinenum()); 767 src = p + 1; 768 dest++; 769 } 770 } 771 int i = atoi(src); 772 if (i >= DEFAULTFLAGS) 773 HUNSPELL_WARNING(stderr, 774 "error: line %d: flag id %d is too large (max: %d)\n", 775 af->getlinenum(), i, DEFAULTFLAGS - 1); 776 *dest = (unsigned short)i; 777 if (*dest == 0) 778 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", 779 af->getlinenum()); 780 break; 781 } 782 case FLAG_UNI: { // UTF-8 characters 783 std::vector<w_char> w; 784 u8_u16(w, flags); 785 len = w.size(); 786 *result = (unsigned short*)alloc(len * sizeof(unsigned short)); 787 if (!*result) 788 return -1; 789 memcpy(*result, w.data(), len * sizeof(short)); 790 break; 791 } 792 default: { // Ispell's one-character flags (erfg -> e r f g) 793 unsigned short* dest; 794 len = flags.size(); 795 *result = (unsigned short*)alloc(len * sizeof(unsigned short)); 796 if (!*result) 797 return -1; 798 dest = *result; 799 for (size_t i = 0; i < flags.size(); ++i) { 800 *dest = (unsigned char)flags[i]; 801 dest++; 802 } 803 } 804 } 805 return len; 806 } 807 808 bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const { 809 if (flags.empty()) { 810 return false; 811 } 812 switch (flag_mode) { 813 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) 814 size_t len = flags.size(); 815 if (len % 2 == 1) 816 HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", 817 af->getlinenum()); 818 len /= 2; 819 result.reserve(result.size() + len); 820 for (size_t i = 0; i < len; ++i) { 821 result.push_back(((unsigned short)((unsigned char)flags[i * 2]) << 8) + 822 (unsigned char)flags[i * 2 + 1]); 823 } 824 break; 825 } 826 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 827 // 23 233) 828 const char* src = flags.c_str(); 829 for (const char* p = src; *p; p++) { 830 if (*p == ',') { 831 int i = atoi(src); 832 if (i >= DEFAULTFLAGS) 833 HUNSPELL_WARNING( 834 stderr, "error: line %d: flag id %d is too large (max: %d)\n", 835 af->getlinenum(), i, DEFAULTFLAGS - 1); 836 result.push_back((unsigned short)i); 837 if (result.back() == 0) 838 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", 839 af->getlinenum()); 840 src = p + 1; 841 } 842 } 843 int i = atoi(src); 844 if (i >= DEFAULTFLAGS) 845 HUNSPELL_WARNING(stderr, 846 "error: line %d: flag id %d is too large (max: %d)\n", 847 af->getlinenum(), i, DEFAULTFLAGS - 1); 848 result.push_back((unsigned short)i); 849 if (result.back() == 0) 850 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", 851 af->getlinenum()); 852 break; 853 } 854 case FLAG_UNI: { // UTF-8 characters 855 std::vector<w_char> w; 856 u8_u16(w, flags); 857 size_t len = w.size(); 858 size_t origsize = result.size(); 859 result.resize(origsize + len); 860 memcpy(result.data() + origsize, w.data(), len * sizeof(short)); 861 break; 862 } 863 default: { // Ispell's one-character flags (erfg -> e r f g) 864 result.reserve(flags.size()); 865 for (size_t i = 0; i < flags.size(); ++i) { 866 result.push_back((unsigned char)flags[i]); 867 } 868 } 869 } 870 return true; 871 } 872 873 unsigned short HashMgr::decode_flag(const char* f) const { 874 unsigned short s = 0; 875 int i; 876 switch (flag_mode) { 877 case FLAG_LONG: 878 s = ((unsigned short)((unsigned char)f[0]) << 8) + (unsigned char)f[1]; 879 break; 880 case FLAG_NUM: 881 i = atoi(f); 882 if (i >= DEFAULTFLAGS) 883 HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", 884 i, DEFAULTFLAGS - 1); 885 s = (unsigned short)i; 886 break; 887 case FLAG_UNI: { 888 std::vector<w_char> w; 889 u8_u16(w, f); 890 if (!w.empty()) 891 memcpy(&s, w.data(), 1 * sizeof(short)); 892 break; 893 } 894 default: 895 s = *(unsigned char*)f; 896 } 897 if (s == 0) 898 HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); 899 return s; 900 } 901 902 // This function is only called by external consumers, and so using the default 903 // allocator with mystrdup is correct. 904 char* HashMgr::encode_flag(unsigned short f) const { 905 if (f == 0) 906 return mystrdup("(NULL)"); 907 std::string ch; 908 if (flag_mode == FLAG_LONG) { 909 ch.push_back((unsigned char)(f >> 8)); 910 ch.push_back((unsigned char)(f - ((f >> 8) << 8))); 911 } else if (flag_mode == FLAG_NUM) { 912 std::ostringstream stream; 913 stream << f; 914 ch = stream.str(); 915 } else if (flag_mode == FLAG_UNI) { 916 const w_char* w_c = (const w_char*)&f; 917 std::vector<w_char> w(w_c, w_c + 1); 918 u16_u8(ch, w); 919 } else { 920 ch.push_back((unsigned char)(f)); 921 } 922 return mystrdup(ch.c_str()); 923 } 924 925 // read in aff file and set flag mode 926 int HashMgr::load_config(const char* affpath, const char* key) { 927 int firstline = 1; 928 929 // open the affix file 930 FileMgr* afflst = new FileMgr(affpath, key); 931 if (!afflst) { 932 HUNSPELL_WARNING( 933 stderr, "Error - could not open affix description file %s\n", affpath); 934 return 1; 935 } 936 937 // read in each line ignoring any that do not 938 // start with a known line type indicator 939 940 std::string line; 941 while (afflst->getline(line)) { 942 mychomp(line); 943 944 /* remove byte order mark */ 945 if (firstline) { 946 firstline = 0; 947 if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) { 948 line.erase(0, 3); 949 } 950 } 951 952 /* parse in the try string */ 953 if ((line.compare(0, 4, "FLAG", 4) == 0) && line.size() > 4 && isspace(line[4])) { 954 if (flag_mode != FLAG_CHAR) { 955 HUNSPELL_WARNING(stderr, 956 "error: line %d: multiple definitions of the FLAG " 957 "affix file parameter\n", 958 afflst->getlinenum()); 959 } 960 if (line.find("long") != std::string::npos) 961 flag_mode = FLAG_LONG; 962 if (line.find("num") != std::string::npos) 963 flag_mode = FLAG_NUM; 964 if (line.find("UTF-8") != std::string::npos) 965 flag_mode = FLAG_UNI; 966 if (flag_mode == FLAG_CHAR) { 967 HUNSPELL_WARNING( 968 stderr, 969 "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", 970 afflst->getlinenum()); 971 } 972 } 973 974 if (line.compare(0, 13, "FORBIDDENWORD", 13) == 0) { 975 std::string st; 976 if (!parse_string(line, st, afflst->getlinenum())) { 977 delete afflst; 978 return 1; 979 } 980 forbiddenword = decode_flag(st.c_str()); 981 } 982 983 if (line.compare(0, 3, "SET", 3) == 0) { 984 if (!parse_string(line, enc, afflst->getlinenum())) { 985 delete afflst; 986 return 1; 987 } 988 if (enc == "UTF-8") { 989 utf8 = 1; 990 #ifndef OPENOFFICEORG 991 #ifndef MOZILLA_CLIENT 992 initialize_utf_tbl(); 993 #endif 994 #endif 995 } else 996 csconv = get_current_cs(enc); 997 } 998 999 if (line.compare(0, 4, "LANG", 4) == 0) { 1000 if (!parse_string(line, lang, afflst->getlinenum())) { 1001 delete afflst; 1002 return 1; 1003 } 1004 langnum = get_lang_num(lang); 1005 } 1006 1007 /* parse in the ignored characters (for example, Arabic optional diacritics 1008 * characters */ 1009 if (line.compare(0, 6, "IGNORE", 6) == 0) { 1010 if (!parse_array(line, ignorechars, ignorechars_utf16, 1011 utf8, afflst->getlinenum())) { 1012 delete afflst; 1013 return 1; 1014 } 1015 } 1016 1017 if ((line.compare(0, 2, "AF", 2) == 0) && line.size() > 2 && isspace(line[2])) { 1018 if (!parse_aliasf(line, afflst)) { 1019 delete afflst; 1020 return 1; 1021 } 1022 } 1023 1024 if ((line.compare(0, 2, "AM", 2) == 0) && line.size() > 2 && isspace(line[2])) { 1025 if (!parse_aliasm(line, afflst)) { 1026 delete afflst; 1027 return 1; 1028 } 1029 } 1030 1031 if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0) 1032 complexprefixes = 1; 1033 1034 /* parse in the typical fault correcting table */ 1035 if (line.compare(0, 3, "REP", 3) == 0) { 1036 if (!parse_reptable(line, afflst)) { 1037 delete afflst; 1038 return 1; 1039 } 1040 } 1041 1042 // don't check the full affix file, yet 1043 if (((line.compare(0, 3, "SFX", 3) == 0) || 1044 (line.compare(0, 3, "PFX", 3) == 0)) && 1045 line.size() > 3 && isspace(line[3]) && 1046 !reptable.empty()) // (REP table is in the end of Afrikaans aff file) 1047 break; 1048 } 1049 1050 if (csconv == NULL) 1051 csconv = get_current_cs(SPELL_ENCODING); 1052 delete afflst; 1053 return 0; 1054 } 1055 1056 /* parse in the ALIAS table */ 1057 bool HashMgr::parse_aliasf(const std::string& line, FileMgr* af) { 1058 if (numaliasf != 0) { 1059 HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", 1060 af->getlinenum()); 1061 return false; 1062 } 1063 int i = 0; 1064 int np = 0; 1065 std::string::const_iterator iter = line.begin(); 1066 std::string::const_iterator start_piece = mystrsep(line, iter); 1067 while (start_piece != line.end()) { 1068 switch (i) { 1069 case 0: { 1070 np++; 1071 break; 1072 } 1073 case 1: { 1074 numaliasf = atoi(std::string(start_piece, iter).c_str()); 1075 if (numaliasf < 1) { 1076 numaliasf = 0; 1077 aliasf = NULL; 1078 aliasflen = NULL; 1079 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", 1080 af->getlinenum()); 1081 return false; 1082 } 1083 aliasf = 1084 (unsigned short**)arena_alloc(numaliasf * sizeof(unsigned short*)); 1085 aliasflen = 1086 (unsigned short*)arena_alloc(numaliasf * sizeof(unsigned short)); 1087 if (!aliasf || !aliasflen) { 1088 numaliasf = 0; 1089 if (aliasf) 1090 arena_free(aliasf); 1091 if (aliasflen) 1092 arena_free(aliasflen); 1093 aliasf = NULL; 1094 aliasflen = NULL; 1095 return false; 1096 } 1097 np++; 1098 break; 1099 } 1100 default: 1101 break; 1102 } 1103 ++i; 1104 start_piece = mystrsep(line, iter); 1105 } 1106 if (np != 2) { 1107 numaliasf = 0; 1108 arena_free(aliasf); 1109 arena_free(aliasflen); 1110 aliasf = NULL; 1111 aliasflen = NULL; 1112 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", 1113 af->getlinenum()); 1114 return false; 1115 } 1116 1117 /* now parse the numaliasf lines to read in the remainder of the table */ 1118 for (int j = 0; j < numaliasf; j++) { 1119 std::string nl; 1120 aliasf[j] = NULL; 1121 aliasflen[j] = 0; 1122 i = 0; 1123 if (af->getline(nl)) { 1124 mychomp(nl); 1125 iter = nl.begin(); 1126 start_piece = mystrsep(nl, iter); 1127 bool errored = false; 1128 while (!errored && start_piece != nl.end()) { 1129 switch (i) { 1130 case 0: { 1131 if (nl.compare(start_piece - nl.begin(), 2, "AF", 2) != 0) { 1132 errored = true; 1133 break; 1134 } 1135 break; 1136 } 1137 case 1: { 1138 std::string piece(start_piece, iter); 1139 aliasflen[j] = 1140 (unsigned short)decode_flags(&(aliasf[j]), piece, af, /* arena = */ true); 1141 std::sort(aliasf[j], aliasf[j] + aliasflen[j]); 1142 break; 1143 } 1144 default: 1145 break; 1146 } 1147 ++i; 1148 start_piece = mystrsep(nl, iter); 1149 } 1150 } 1151 if (!aliasf[j]) { 1152 for (int k = 0; k < j; ++k) { 1153 arena_free(aliasf[k]); 1154 } 1155 arena_free(aliasf); 1156 arena_free(aliasflen); 1157 aliasf = NULL; 1158 aliasflen = NULL; 1159 numaliasf = 0; 1160 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", 1161 af->getlinenum()); 1162 return false; 1163 } 1164 } 1165 return true; 1166 } 1167 1168 int HashMgr::is_aliasf() const { 1169 return (aliasf != NULL); 1170 } 1171 1172 int HashMgr::get_aliasf(int index, unsigned short** fvec, FileMgr* af) const { 1173 if ((index > 0) && (index <= numaliasf)) { 1174 *fvec = aliasf[index - 1]; 1175 return aliasflen[index - 1]; 1176 } 1177 HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", 1178 af->getlinenum(), index); 1179 *fvec = NULL; 1180 return 0; 1181 } 1182 1183 /* parse morph alias definitions */ 1184 bool HashMgr::parse_aliasm(const std::string& line, FileMgr* af) { 1185 if (numaliasm != 0) { 1186 HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", 1187 af->getlinenum()); 1188 return false; 1189 } 1190 int i = 0; 1191 int np = 0; 1192 std::string::const_iterator iter = line.begin(); 1193 std::string::const_iterator start_piece = mystrsep(line, iter); 1194 while (start_piece != line.end()) { 1195 switch (i) { 1196 case 0: { 1197 np++; 1198 break; 1199 } 1200 case 1: { 1201 numaliasm = atoi(std::string(start_piece, iter).c_str()); 1202 if (numaliasm < 1) { 1203 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", 1204 af->getlinenum()); 1205 return false; 1206 } 1207 aliasm = (char**)arena_alloc(numaliasm * sizeof(char*)); 1208 if (!aliasm) { 1209 numaliasm = 0; 1210 return false; 1211 } 1212 np++; 1213 break; 1214 } 1215 default: 1216 break; 1217 } 1218 ++i; 1219 start_piece = mystrsep(line, iter); 1220 } 1221 if (np != 2) { 1222 numaliasm = 0; 1223 arena_free(aliasm); 1224 aliasm = NULL; 1225 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", 1226 af->getlinenum()); 1227 return false; 1228 } 1229 1230 /* now parse the numaliasm lines to read in the remainder of the table */ 1231 for (int j = 0; j < numaliasm; j++) { 1232 std::string nl; 1233 aliasm[j] = NULL; 1234 if (af->getline(nl)) { 1235 mychomp(nl); 1236 iter = nl.begin(); 1237 i = 0; 1238 start_piece = mystrsep(nl, iter); 1239 bool errored = false; 1240 while (!errored && start_piece != nl.end()) { 1241 switch (i) { 1242 case 0: { 1243 if (nl.compare(start_piece - nl.begin(), 2, "AM", 2) != 0) { 1244 errored = true; 1245 break; 1246 } 1247 break; 1248 } 1249 case 1: { 1250 // add the remaining of the line 1251 std::string::const_iterator end = nl.end(); 1252 std::string chunk(start_piece, end); 1253 if (complexprefixes) { 1254 if (utf8) 1255 reverseword_utf(chunk); 1256 else 1257 reverseword(chunk); 1258 } 1259 size_t sl = chunk.length() + 1; 1260 aliasm[j] = (char*)arena_alloc(sl); 1261 if (aliasm[j]) { 1262 memcpy(aliasm[j], chunk.c_str(), sl); 1263 } 1264 break; 1265 } 1266 default: 1267 break; 1268 } 1269 ++i; 1270 start_piece = mystrsep(nl, iter); 1271 } 1272 } 1273 if (!aliasm[j]) { 1274 numaliasm = 0; 1275 for (int k = 0; k < j; ++k) { 1276 arena_free(aliasm[k]); 1277 } 1278 arena_free(aliasm); 1279 aliasm = NULL; 1280 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", 1281 af->getlinenum()); 1282 return false; 1283 } 1284 } 1285 return true; 1286 } 1287 1288 int HashMgr::is_aliasm() const { 1289 return (aliasm != NULL); 1290 } 1291 1292 char* HashMgr::get_aliasm(int index) const { 1293 if ((index > 0) && (index <= numaliasm)) 1294 return aliasm[index - 1]; 1295 HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); 1296 return NULL; 1297 } 1298 1299 /* parse in the typical fault correcting table */ 1300 bool HashMgr::parse_reptable(const std::string& line, FileMgr* af) { 1301 if (!reptable.empty()) { 1302 HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", 1303 af->getlinenum()); 1304 return false; 1305 } 1306 int numrep = -1; 1307 int i = 0; 1308 int np = 0; 1309 std::string::const_iterator iter = line.begin(); 1310 std::string::const_iterator start_piece = mystrsep(line, iter); 1311 while (start_piece != line.end()) { 1312 switch (i) { 1313 case 0: { 1314 np++; 1315 break; 1316 } 1317 case 1: { 1318 numrep = atoi(std::string(start_piece, iter).c_str()); 1319 if (numrep < 1) { 1320 HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", 1321 af->getlinenum()); 1322 return false; 1323 } 1324 reptable.reserve(numrep); 1325 np++; 1326 break; 1327 } 1328 default: 1329 break; 1330 } 1331 ++i; 1332 start_piece = mystrsep(line, iter); 1333 } 1334 if (np != 2) { 1335 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", 1336 af->getlinenum()); 1337 return false; 1338 } 1339 1340 /* now parse the numrep lines to read in the remainder of the table */ 1341 for (int j = 0; j < numrep; ++j) { 1342 std::string nl; 1343 reptable.push_back(replentry()); 1344 int type = 0; 1345 if (af->getline(nl)) { 1346 mychomp(nl); 1347 iter = nl.begin(); 1348 i = 0; 1349 start_piece = mystrsep(nl, iter); 1350 bool errored = false; 1351 while (!errored && start_piece != nl.end()) { 1352 switch (i) { 1353 case 0: { 1354 if (nl.compare(start_piece - nl.begin(), 3, "REP", 3) != 0) { 1355 errored = true; 1356 break; 1357 } 1358 break; 1359 } 1360 case 1: { 1361 if (*start_piece == '^') 1362 type = 1; 1363 reptable.back().pattern.assign(start_piece + type, iter); 1364 mystrrep(reptable.back().pattern, "_", " "); 1365 if (!reptable.back().pattern.empty() && reptable.back().pattern[reptable.back().pattern.size() - 1] == '$') { 1366 type += 2; 1367 reptable.back().pattern.resize(reptable.back().pattern.size() - 1); 1368 } 1369 break; 1370 } 1371 case 2: { 1372 reptable.back().outstrings[type].assign(start_piece, iter); 1373 mystrrep(reptable.back().outstrings[type], "_", " "); 1374 break; 1375 } 1376 default: 1377 break; 1378 } 1379 ++i; 1380 start_piece = mystrsep(nl, iter); 1381 } 1382 } 1383 if (reptable.back().pattern.empty() || reptable.back().outstrings[type].empty()) { 1384 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", 1385 af->getlinenum()); 1386 reptable.clear(); 1387 return false; 1388 } 1389 } 1390 return true; 1391 } 1392 1393 // return replacing table 1394 const std::vector<replentry>& HashMgr::get_reptable() const { 1395 return reptable; 1396 } 1397 1398 void* HashMgr::arena_alloc(int num_bytes) { 1399 static const int MIN_CHUNK_SIZE = 4096; 1400 if (arena.empty() || (current_chunk_size - current_chunk_offset < num_bytes)) { 1401 current_chunk_size = std::max(MIN_CHUNK_SIZE, num_bytes); 1402 arena.push_back(std::make_unique<uint8_t[]>(current_chunk_size)); 1403 current_chunk_offset = 0; 1404 } 1405 1406 uint8_t* ptr = &arena.back()[current_chunk_offset]; 1407 current_chunk_offset += num_bytes; 1408 outstanding_arena_allocations++; 1409 return ptr; 1410 } 1411 1412 void HashMgr::arena_free(void* ptr) { 1413 --outstanding_arena_allocations; 1414 assert(outstanding_arena_allocations >= 0); 1415 }